upgini 1.2.20__py3-none-any.whl → 1.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +22 -10
- {upgini-1.2.20.dist-info → upgini-1.2.21.dist-info}/METADATA +1 -2
- {upgini-1.2.20.dist-info → upgini-1.2.21.dist-info}/RECORD +6 -6
- {upgini-1.2.20.dist-info → upgini-1.2.21.dist-info}/WHEEL +0 -0
- {upgini-1.2.20.dist-info → upgini-1.2.21.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.21"
|
upgini/features_enricher.py
CHANGED
|
@@ -228,7 +228,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
228
228
|
):
|
|
229
229
|
self.bundle = get_custom_bundle(custom_bundle_config)
|
|
230
230
|
self._api_key = api_key or os.environ.get(UPGINI_API_KEY)
|
|
231
|
-
if
|
|
231
|
+
if self._api_key is not None and not isinstance(self._api_key, str):
|
|
232
232
|
raise ValidationError(f"api_key should be `string`, but passed: `{api_key}`")
|
|
233
233
|
self.rest_client = get_rest_client(endpoint, self._api_key, client_ip, client_visitorid)
|
|
234
234
|
self.client_ip = client_ip
|
|
@@ -259,7 +259,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
259
259
|
self.eval_set: Optional[List[Tuple]] = None
|
|
260
260
|
self.autodetected_search_keys: Dict[str, SearchKey] = {}
|
|
261
261
|
self.imbalanced = False
|
|
262
|
-
self.__cached_sampled_datasets:
|
|
262
|
+
self.__cached_sampled_datasets: Dict[str, Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict, Dict]] = (
|
|
263
|
+
dict()
|
|
264
|
+
)
|
|
263
265
|
|
|
264
266
|
validate_version(self.logger)
|
|
265
267
|
self.search_keys = search_keys or {}
|
|
@@ -1583,9 +1585,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1583
1585
|
progress_bar: Optional[ProgressBar],
|
|
1584
1586
|
progress_callback: Optional[Callable[[SearchProgress], Any]],
|
|
1585
1587
|
) -> _SampledDataForMetrics:
|
|
1586
|
-
|
|
1588
|
+
datasets_hash = hash_input(validated_X, validated_y, eval_set)
|
|
1589
|
+
cached_sampled_datasets = self.__cached_sampled_datasets.get(datasets_hash)
|
|
1590
|
+
if cached_sampled_datasets is not None and is_input_same_as_fit and remove_outliers_calc_metrics is None:
|
|
1587
1591
|
self.logger.info("Cached enriched dataset found - use it")
|
|
1588
|
-
return self.__get_sampled_cached_enriched(exclude_features_sources)
|
|
1592
|
+
return self.__get_sampled_cached_enriched(datasets_hash, exclude_features_sources)
|
|
1589
1593
|
elif len(self.feature_importances_) == 0:
|
|
1590
1594
|
self.logger.info("No external features selected. So use only input datasets for metrics calculation")
|
|
1591
1595
|
return self.__sample_only_input(validated_X, validated_y, eval_set, is_demo_dataset)
|
|
@@ -1615,9 +1619,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1615
1619
|
progress_callback,
|
|
1616
1620
|
)
|
|
1617
1621
|
|
|
1618
|
-
def __get_sampled_cached_enriched(
|
|
1622
|
+
def __get_sampled_cached_enriched(
|
|
1623
|
+
self, datasets_hash: str, exclude_features_sources: Optional[List[str]]
|
|
1624
|
+
) -> _SampledDataForMetrics:
|
|
1619
1625
|
X_sampled, y_sampled, enriched_X, eval_set_sampled_dict, search_keys, columns_renaming = (
|
|
1620
|
-
self.__cached_sampled_datasets
|
|
1626
|
+
self.__cached_sampled_datasets[datasets_hash]
|
|
1621
1627
|
)
|
|
1622
1628
|
if exclude_features_sources:
|
|
1623
1629
|
enriched_X = enriched_X.drop(columns=exclude_features_sources, errors="ignore")
|
|
@@ -1692,7 +1698,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1692
1698
|
eval_y_sampled = eval_xy_sampled[TARGET].copy()
|
|
1693
1699
|
enriched_eval_X = eval_X_sampled
|
|
1694
1700
|
eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
|
|
1695
|
-
|
|
1701
|
+
|
|
1702
|
+
datasets_hash = hash_input(X_sampled, y_sampled, eval_set_sampled_dict)
|
|
1703
|
+
self.__cached_sampled_datasets[datasets_hash] = (
|
|
1696
1704
|
X_sampled,
|
|
1697
1705
|
y_sampled,
|
|
1698
1706
|
enriched_X,
|
|
@@ -1770,7 +1778,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1770
1778
|
enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
|
|
1771
1779
|
eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
|
|
1772
1780
|
|
|
1773
|
-
self.
|
|
1781
|
+
datasets_hash = hash_input(self.X, self.y, self.eval_set)
|
|
1782
|
+
self.__cached_sampled_datasets[datasets_hash] = (
|
|
1774
1783
|
X_sampled,
|
|
1775
1784
|
y_sampled,
|
|
1776
1785
|
enriched_X,
|
|
@@ -1895,7 +1904,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1895
1904
|
y_sampled = enriched_Xy[TARGET].copy()
|
|
1896
1905
|
enriched_X = enriched_Xy.drop(columns=TARGET)
|
|
1897
1906
|
|
|
1898
|
-
|
|
1907
|
+
datasets_hash = hash_input(X_sampled, y_sampled, eval_set_sampled_dict)
|
|
1908
|
+
self.__cached_sampled_datasets[datasets_hash] = (
|
|
1899
1909
|
X_sampled,
|
|
1900
1910
|
y_sampled,
|
|
1901
1911
|
enriched_X,
|
|
@@ -2426,7 +2436,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2426
2436
|
):
|
|
2427
2437
|
self.warning_counter.reset()
|
|
2428
2438
|
self.df_with_original_index = None
|
|
2429
|
-
self.__cached_sampled_datasets =
|
|
2439
|
+
self.__cached_sampled_datasets = dict()
|
|
2430
2440
|
self.metrics = None
|
|
2431
2441
|
self.fit_columns_renaming = None
|
|
2432
2442
|
self.fit_dropped_features = set()
|
|
@@ -4196,6 +4206,8 @@ def hash_input(X: pd.DataFrame, y: Optional[pd.Series] = None, eval_set: Optiona
|
|
|
4196
4206
|
if y is not None:
|
|
4197
4207
|
hashed_objects.append(pd.util.hash_pandas_object(y, index=False).values)
|
|
4198
4208
|
if eval_set is not None:
|
|
4209
|
+
if isinstance(eval_set, tuple):
|
|
4210
|
+
eval_set = [eval_set]
|
|
4199
4211
|
for eval_X, eval_y in eval_set:
|
|
4200
4212
|
hashed_objects.append(pd.util.hash_pandas_object(eval_X, index=False).values)
|
|
4201
4213
|
hashed_objects.append(pd.util.hash_pandas_object(eval_y, index=False).values)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.21
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
7
7
|
Project-URL: Source, https://github.com/upgini/upgini
|
|
8
8
|
Author-email: Upgini Developers <madewithlove@upgini.com>
|
|
9
|
-
License-Expression: BSD-3-Clause
|
|
10
9
|
License-File: LICENSE
|
|
11
10
|
Keywords: automl,data mining,data science,data search,machine learning
|
|
12
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=3uF8dV6igE_qWszFViblw_rtJOgCwoi-rVuWcQrSR0A,23
|
|
2
2
|
upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=OzDD04qNk9hReKDM6FQYhZy7zsxSpi9yaE6EgIZWJQU,193386
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=qHzZRmICFbLNCrmVqGkaBcjm91L2ERRZMppci36acV4,10085
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.2.
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
60
|
+
upgini-1.2.21.dist-info/METADATA,sha256=A7mEGDHT-kvrB3LQ1TzPLMU9MKYOShh67XWo0ZdnMCQ,48578
|
|
61
|
+
upgini-1.2.21.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.2.21.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.2.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|