upgini 1.2.47__py3-none-any.whl → 1.2.49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +15 -8
- upgini/utils/feature_info.py +7 -6
- {upgini-1.2.47.dist-info → upgini-1.2.49.dist-info}/METADATA +1 -1
- {upgini-1.2.47.dist-info → upgini-1.2.49.dist-info}/RECORD +7 -7
- {upgini-1.2.47.dist-info → upgini-1.2.49.dist-info}/WHEEL +0 -0
- {upgini-1.2.47.dist-info → upgini-1.2.49.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.49"
|
upgini/features_enricher.py
CHANGED
|
@@ -400,7 +400,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
400
400
|
remove_outliers_calc_metrics: Optional[bool] = None,
|
|
401
401
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
|
402
402
|
search_id_callback: Optional[Callable[[str], Any]] = None,
|
|
403
|
-
select_features: bool =
|
|
403
|
+
select_features: bool = True,
|
|
404
404
|
**kwargs,
|
|
405
405
|
):
|
|
406
406
|
"""Fit to data.
|
|
@@ -543,7 +543,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
543
543
|
estimator: Optional[Any] = None,
|
|
544
544
|
remove_outliers_calc_metrics: Optional[bool] = None,
|
|
545
545
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
|
546
|
-
select_features: bool =
|
|
546
|
+
select_features: bool = True,
|
|
547
547
|
**kwargs,
|
|
548
548
|
) -> pd.DataFrame:
|
|
549
549
|
"""Fit to data, then transform it.
|
|
@@ -1486,8 +1486,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1486
1486
|
for c in X_sampled.columns.to_list()
|
|
1487
1487
|
if (
|
|
1488
1488
|
not self.fit_select_features
|
|
1489
|
-
or c in self.feature_names_
|
|
1490
|
-
or (self.fit_columns_renaming
|
|
1489
|
+
or c in set(self.feature_names_).union(self.id_columns or [])
|
|
1490
|
+
or (self.fit_columns_renaming or {}).get(c, c) in set(self.feature_names_).union(self.id_columns or [])
|
|
1491
1491
|
)
|
|
1492
1492
|
and c
|
|
1493
1493
|
not in (
|
|
@@ -2191,7 +2191,9 @@ if response.status_code == 200:
|
|
|
2191
2191
|
|
|
2192
2192
|
search_keys = self.search_keys.copy()
|
|
2193
2193
|
if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
|
|
2194
|
-
self.search_keys.update(
|
|
2194
|
+
self.search_keys.update(
|
|
2195
|
+
{col: SearchKey.CUSTOM_KEY for col in self.id_columns if col not in self.search_keys}
|
|
2196
|
+
)
|
|
2195
2197
|
|
|
2196
2198
|
search_keys = self.__prepare_search_keys(
|
|
2197
2199
|
validated_X, search_keys, is_demo_dataset, is_transform=True, silent_mode=silent_mode
|
|
@@ -2716,8 +2718,12 @@ if response.status_code == 200:
|
|
|
2716
2718
|
if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
|
|
2717
2719
|
id_columns = self.__get_renamed_id_columns()
|
|
2718
2720
|
if id_columns:
|
|
2719
|
-
self.fit_search_keys.update(
|
|
2720
|
-
|
|
2721
|
+
self.fit_search_keys.update(
|
|
2722
|
+
{col: SearchKey.CUSTOM_KEY for col in id_columns if col not in self.fit_search_keys}
|
|
2723
|
+
)
|
|
2724
|
+
self.search_keys.update(
|
|
2725
|
+
{col: SearchKey.CUSTOM_KEY for col in self.id_columns if col not in self.search_keys}
|
|
2726
|
+
)
|
|
2721
2727
|
self.runtime_parameters.properties["id_columns"] = ",".join(id_columns)
|
|
2722
2728
|
|
|
2723
2729
|
df, fintech_warnings = remove_fintech_duplicates(
|
|
@@ -3736,7 +3742,8 @@ if response.status_code == 200:
|
|
|
3736
3742
|
self.feature_names_.append(feature_meta.name)
|
|
3737
3743
|
self.feature_importances_.append(_round_shap_value(feature_meta.shap_value))
|
|
3738
3744
|
|
|
3739
|
-
|
|
3745
|
+
df_for_sample = features_df if feature_meta.name in features_df.columns else self.X
|
|
3746
|
+
feature_info = FeatureInfo.from_metadata(feature_meta, df_for_sample, is_client_feature)
|
|
3740
3747
|
features_info.append(feature_info.to_row(self.bundle))
|
|
3741
3748
|
features_info_without_links.append(feature_info.to_row_without_links(self.bundle))
|
|
3742
3749
|
internal_features_info.append(feature_info.to_internal_row(self.bundle))
|
upgini/utils/feature_info.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
1
|
import itertools
|
|
3
|
-
from
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
@@ -8,7 +8,6 @@ import pandas as pd
|
|
|
8
8
|
from upgini.metadata import FeaturesMetadataV2
|
|
9
9
|
from upgini.resource_bundle import ResourceBundle
|
|
10
10
|
|
|
11
|
-
|
|
12
11
|
LLM_SOURCE = "LLM with external data augmentation"
|
|
13
12
|
|
|
14
13
|
|
|
@@ -30,7 +29,9 @@ class FeatureInfo:
|
|
|
30
29
|
data_source_link: str
|
|
31
30
|
|
|
32
31
|
@staticmethod
|
|
33
|
-
def from_metadata(
|
|
32
|
+
def from_metadata(
|
|
33
|
+
feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame], is_client_feature: bool
|
|
34
|
+
) -> "FeatureInfo":
|
|
34
35
|
return FeatureInfo(
|
|
35
36
|
name=_get_name(feature_meta),
|
|
36
37
|
internal_name=_get_internal_name(feature_meta),
|
|
@@ -86,8 +87,8 @@ class FeatureInfo:
|
|
|
86
87
|
}
|
|
87
88
|
|
|
88
89
|
|
|
89
|
-
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: pd.DataFrame) -> str:
|
|
90
|
-
if feature_meta.name in data.columns:
|
|
90
|
+
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
|
|
91
|
+
if data is not None and feature_meta.name in data.columns:
|
|
91
92
|
feature_sample = np.random.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
|
|
92
93
|
if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
|
|
93
94
|
feature_sample = [round(f, 4) for f in feature_sample]
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=qkDVgmmc_W7WSA8UFQzKzULlTX27bnHrBTAlSeTYqYs,23
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=QC3jncWS3wHe4CY7pWWDMO_3HKxGbi0EyPHXMdBtoQM,33456
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=8bhABqZ3aXIc_5WBVNqFnRNT_0fCNbLyPwXv7VXdygs,200350
|
|
7
7
|
upgini/http.py,sha256=danPeX7nTMa_70S-pk-4UUm5yOvXYlR84jgyjoHYBkU,43367
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
|
|
@@ -48,7 +48,7 @@ upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuM
|
|
|
48
48
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
49
49
|
upgini/utils/email_utils.py,sha256=GbnhHJn1nhUBytmK6PophYqaoq4t7Lp6i0-O0Gd3RV8,5265
|
|
50
50
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
51
|
-
upgini/utils/feature_info.py,sha256=
|
|
51
|
+
upgini/utils/feature_info.py,sha256=0rOXSyCj-sw-8migWP0ge8qrOzGU50dQvH0JUJUrDfQ,6766
|
|
52
52
|
upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
|
|
53
53
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
54
54
|
upgini/utils/ip_utils.py,sha256=VORRmtKlItcbBVVK5SiwXD7J-6Y5rn7UQ5m6WcBXt7E,5698
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
65
|
-
upgini-1.2.
|
|
62
|
+
upgini-1.2.49.dist-info/METADATA,sha256=XZ11OqCR6UqQrqo2RsKAIZYdPIVmIcn61GxRJn6f9Ys,49055
|
|
63
|
+
upgini-1.2.49.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
64
|
+
upgini-1.2.49.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.49.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|