upgini 1.2.20__py3-none-any.whl → 1.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.20"
1
+ __version__ = "1.2.21"
@@ -228,7 +228,7 @@ class FeaturesEnricher(TransformerMixin):
228
228
  ):
229
229
  self.bundle = get_custom_bundle(custom_bundle_config)
230
230
  self._api_key = api_key or os.environ.get(UPGINI_API_KEY)
231
- if api_key is not None and not isinstance(api_key, str):
231
+ if self._api_key is not None and not isinstance(self._api_key, str):
232
232
  raise ValidationError(f"api_key should be `string`, but passed: `{api_key}`")
233
233
  self.rest_client = get_rest_client(endpoint, self._api_key, client_ip, client_visitorid)
234
234
  self.client_ip = client_ip
@@ -259,7 +259,9 @@ class FeaturesEnricher(TransformerMixin):
259
259
  self.eval_set: Optional[List[Tuple]] = None
260
260
  self.autodetected_search_keys: Dict[str, SearchKey] = {}
261
261
  self.imbalanced = False
262
- self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict, Dict]] = None
262
+ self.__cached_sampled_datasets: Dict[str, Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict, Dict]] = (
263
+ dict()
264
+ )
263
265
 
264
266
  validate_version(self.logger)
265
267
  self.search_keys = search_keys or {}
@@ -1583,9 +1585,11 @@ class FeaturesEnricher(TransformerMixin):
1583
1585
  progress_bar: Optional[ProgressBar],
1584
1586
  progress_callback: Optional[Callable[[SearchProgress], Any]],
1585
1587
  ) -> _SampledDataForMetrics:
1586
- if self.__cached_sampled_datasets is not None and is_input_same_as_fit and remove_outliers_calc_metrics is None:
1588
+ datasets_hash = hash_input(validated_X, validated_y, eval_set)
1589
+ cached_sampled_datasets = self.__cached_sampled_datasets.get(datasets_hash)
1590
+ if cached_sampled_datasets is not None and is_input_same_as_fit and remove_outliers_calc_metrics is None:
1587
1591
  self.logger.info("Cached enriched dataset found - use it")
1588
- return self.__get_sampled_cached_enriched(exclude_features_sources)
1592
+ return self.__get_sampled_cached_enriched(datasets_hash, exclude_features_sources)
1589
1593
  elif len(self.feature_importances_) == 0:
1590
1594
  self.logger.info("No external features selected. So use only input datasets for metrics calculation")
1591
1595
  return self.__sample_only_input(validated_X, validated_y, eval_set, is_demo_dataset)
@@ -1615,9 +1619,11 @@ class FeaturesEnricher(TransformerMixin):
1615
1619
  progress_callback,
1616
1620
  )
1617
1621
 
1618
- def __get_sampled_cached_enriched(self, exclude_features_sources: Optional[List[str]]) -> _SampledDataForMetrics:
1622
+ def __get_sampled_cached_enriched(
1623
+ self, datasets_hash: str, exclude_features_sources: Optional[List[str]]
1624
+ ) -> _SampledDataForMetrics:
1619
1625
  X_sampled, y_sampled, enriched_X, eval_set_sampled_dict, search_keys, columns_renaming = (
1620
- self.__cached_sampled_datasets
1626
+ self.__cached_sampled_datasets[datasets_hash]
1621
1627
  )
1622
1628
  if exclude_features_sources:
1623
1629
  enriched_X = enriched_X.drop(columns=exclude_features_sources, errors="ignore")
@@ -1692,7 +1698,9 @@ class FeaturesEnricher(TransformerMixin):
1692
1698
  eval_y_sampled = eval_xy_sampled[TARGET].copy()
1693
1699
  enriched_eval_X = eval_X_sampled
1694
1700
  eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
1695
- self.__cached_sampled_datasets = (
1701
+
1702
+ datasets_hash = hash_input(X_sampled, y_sampled, eval_set_sampled_dict)
1703
+ self.__cached_sampled_datasets[datasets_hash] = (
1696
1704
  X_sampled,
1697
1705
  y_sampled,
1698
1706
  enriched_X,
@@ -1770,7 +1778,8 @@ class FeaturesEnricher(TransformerMixin):
1770
1778
  enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
1771
1779
  eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
1772
1780
 
1773
- self.__cached_sampled_datasets = (
1781
+ datasets_hash = hash_input(self.X, self.y, self.eval_set)
1782
+ self.__cached_sampled_datasets[datasets_hash] = (
1774
1783
  X_sampled,
1775
1784
  y_sampled,
1776
1785
  enriched_X,
@@ -1895,7 +1904,8 @@ class FeaturesEnricher(TransformerMixin):
1895
1904
  y_sampled = enriched_Xy[TARGET].copy()
1896
1905
  enriched_X = enriched_Xy.drop(columns=TARGET)
1897
1906
 
1898
- self.__cached_sampled_datasets = (
1907
+ datasets_hash = hash_input(X_sampled, y_sampled, eval_set_sampled_dict)
1908
+ self.__cached_sampled_datasets[datasets_hash] = (
1899
1909
  X_sampled,
1900
1910
  y_sampled,
1901
1911
  enriched_X,
@@ -2426,7 +2436,7 @@ class FeaturesEnricher(TransformerMixin):
2426
2436
  ):
2427
2437
  self.warning_counter.reset()
2428
2438
  self.df_with_original_index = None
2429
- self.__cached_sampled_datasets = None
2439
+ self.__cached_sampled_datasets = dict()
2430
2440
  self.metrics = None
2431
2441
  self.fit_columns_renaming = None
2432
2442
  self.fit_dropped_features = set()
@@ -4196,6 +4206,8 @@ def hash_input(X: pd.DataFrame, y: Optional[pd.Series] = None, eval_set: Optiona
4196
4206
  if y is not None:
4197
4207
  hashed_objects.append(pd.util.hash_pandas_object(y, index=False).values)
4198
4208
  if eval_set is not None:
4209
+ if isinstance(eval_set, tuple):
4210
+ eval_set = [eval_set]
4199
4211
  for eval_X, eval_y in eval_set:
4200
4212
  hashed_objects.append(pd.util.hash_pandas_object(eval_X, index=False).values)
4201
4213
  hashed_objects.append(pd.util.hash_pandas_object(eval_y, index=False).values)
@@ -1,12 +1,11 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.20
3
+ Version: 1.2.21
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
7
7
  Project-URL: Source, https://github.com/upgini/upgini
8
8
  Author-email: Upgini Developers <madewithlove@upgini.com>
9
- License-Expression: BSD-3-Clause
10
9
  License-File: LICENSE
11
10
  Keywords: automl,data mining,data science,data search,machine learning
12
11
  Classifier: Development Status :: 5 - Production/Stable
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=nQtXpLTEUbMtAPecTV_hZAJZb9EhWc8glRv6hgKyvG4,23
1
+ upgini/__about__.py,sha256=3uF8dV6igE_qWszFViblw_rtJOgCwoi-rVuWcQrSR0A,23
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=RAeZFb_0VA23rHjWXo2hKEhoTxp0z9PdFD3KhCSOS_0,192779
6
+ upgini/features_enricher.py,sha256=OzDD04qNk9hReKDM6FQYhZy7zsxSpi9yaE6EgIZWJQU,193386
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=qHzZRmICFbLNCrmVqGkaBcjm91L2ERRZMppci36acV4,10085
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.2.20.dist-info/METADATA,sha256=NVxQ5AA2uDaCtzEDlqWqpG6uEOi2xufY3pqvO9XtdgY,48611
61
- upgini-1.2.20.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.2.20.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.20.dist-info/RECORD,,
60
+ upgini-1.2.21.dist-info/METADATA,sha256=A7mEGDHT-kvrB3LQ1TzPLMU9MKYOShh67XWo0ZdnMCQ,48578
61
+ upgini-1.2.21.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.2.21.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.2.21.dist-info/RECORD,,