upgini 1.2.135a3__py3-none-any.whl → 1.2.136__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/dataset.py +3 -0
- upgini/features_enricher.py +10 -3
- upgini/metadata.py +1 -0
- upgini/utils/features_validator.py +1 -1
- {upgini-1.2.135a3.dist-info → upgini-1.2.136.dist-info}/METADATA +1 -1
- {upgini-1.2.135a3.dist-info → upgini-1.2.136.dist-info}/RECORD +9 -9
- {upgini-1.2.135a3.dist-info → upgini-1.2.136.dist-info}/WHEEL +0 -0
- {upgini-1.2.135a3.dist-info → upgini-1.2.136.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.136"
|
upgini/dataset.py
CHANGED
|
@@ -71,6 +71,7 @@ class Dataset:
|
|
|
71
71
|
date_column: Optional[str] = None,
|
|
72
72
|
id_columns: Optional[List[str]] = None,
|
|
73
73
|
is_imbalanced: bool = False,
|
|
74
|
+
dropped_columns: Optional[List[str]] = None,
|
|
74
75
|
random_state: Optional[int] = None,
|
|
75
76
|
sample_config: Optional[SampleConfig] = None,
|
|
76
77
|
rest_client: Optional[_RestClient] = None,
|
|
@@ -118,6 +119,7 @@ class Dataset:
|
|
|
118
119
|
self.is_imbalanced: bool = False
|
|
119
120
|
self.id_columns = id_columns
|
|
120
121
|
self.is_imbalanced = is_imbalanced
|
|
122
|
+
self.dropped_columns = dropped_columns
|
|
121
123
|
self.date_column = date_column
|
|
122
124
|
if logger is not None:
|
|
123
125
|
self.logger = logger
|
|
@@ -476,6 +478,7 @@ class Dataset:
|
|
|
476
478
|
hierarchicalGroupKeys=self.hierarchical_group_keys,
|
|
477
479
|
hierarchicalSubgroupKeys=self.hierarchical_subgroup_keys,
|
|
478
480
|
taskType=self.task_type,
|
|
481
|
+
droppedColumns=self.dropped_columns,
|
|
479
482
|
)
|
|
480
483
|
|
|
481
484
|
@staticmethod
|
upgini/features_enricher.py
CHANGED
|
@@ -1737,6 +1737,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1737
1737
|
|
|
1738
1738
|
self.logger.info(f"Excluding search keys: {excluding_search_keys}")
|
|
1739
1739
|
|
|
1740
|
+
file_meta = self._search_task.get_file_metadata(trace_id)
|
|
1741
|
+
fit_dropped_features = self.fit_dropped_features or file_meta.droppedColumns or []
|
|
1742
|
+
original_dropped_features = [columns_renaming.get(f, f) for f in fit_dropped_features]
|
|
1743
|
+
|
|
1740
1744
|
client_features = [
|
|
1741
1745
|
c
|
|
1742
1746
|
for c in validated_X.columns.to_list()
|
|
@@ -1744,7 +1748,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1744
1748
|
and c
|
|
1745
1749
|
not in (
|
|
1746
1750
|
excluding_search_keys
|
|
1747
|
-
+
|
|
1751
|
+
+ original_dropped_features
|
|
1748
1752
|
+ [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
1749
1753
|
)
|
|
1750
1754
|
]
|
|
@@ -2955,8 +2959,10 @@ if response.status_code == 200:
|
|
|
2955
2959
|
trace_id: str,
|
|
2956
2960
|
is_transform: bool = False,
|
|
2957
2961
|
):
|
|
2958
|
-
|
|
2959
|
-
|
|
2962
|
+
file_meta = self._search_task.get_file_metadata(trace_id)
|
|
2963
|
+
fit_dropped_features = self.fit_dropped_features or file_meta.droppedColumns or []
|
|
2964
|
+
fit_input_columns = [c.originalName for c in file_meta.columns]
|
|
2965
|
+
original_dropped_features = [self.fit_columns_renaming.get(c, c) for c in fit_dropped_features]
|
|
2960
2966
|
new_columns_on_transform = [
|
|
2961
2967
|
c for c in validated_Xy.columns if c not in fit_input_columns and c not in original_dropped_features
|
|
2962
2968
|
]
|
|
@@ -3380,6 +3386,7 @@ if response.status_code == 200:
|
|
|
3380
3386
|
cv_type=self.cv,
|
|
3381
3387
|
id_columns=self.__get_renamed_id_columns(),
|
|
3382
3388
|
is_imbalanced=self.imbalanced,
|
|
3389
|
+
dropped_columns=[self.fit_columns_renaming.get(f, f) for f in self.fit_dropped_features],
|
|
3383
3390
|
date_column=self._get_date_column(self.fit_search_keys),
|
|
3384
3391
|
date_format=self.date_format,
|
|
3385
3392
|
random_state=self.random_state,
|
upgini/metadata.py
CHANGED
|
@@ -252,6 +252,7 @@ class FileMetadata(BaseModel):
|
|
|
252
252
|
rowsCount: Optional[int] = None
|
|
253
253
|
checksumMD5: Optional[str] = None
|
|
254
254
|
digest: Optional[str] = None
|
|
255
|
+
droppedColumns: Optional[List[str]] = None
|
|
255
256
|
|
|
256
257
|
def column_by_name(self, name: str) -> Optional[FileColumnMetadata]:
|
|
257
258
|
for c in self.columns:
|
|
@@ -46,7 +46,7 @@ class FeaturesValidator:
|
|
|
46
46
|
|
|
47
47
|
columns_renaming = columns_renaming or {}
|
|
48
48
|
|
|
49
|
-
if one_hot_encoded_features:
|
|
49
|
+
if one_hot_encoded_features and len(one_hot_encoded_features) > 1:
|
|
50
50
|
msg = bundle.get("one_hot_encoded_features").format(
|
|
51
51
|
[columns_renaming.get(f, f) for f in one_hot_encoded_features]
|
|
52
52
|
)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=geG4WOLTgtin9k9NxkyjPJlJc57re5T0iGsOwFw3cp4,24
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=4bWKKFdFbFvdcb-JS4Nt2Je8eKqPg5QRLlmchQuY2aw,31870
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=9pBxZKWsXF_IhZXnswCQZGU-cqOOT1EqH-FPu1zJo4E,235950
|
|
7
7
|
upgini/http.py,sha256=-J_wOpnwVnT0ebPC6sOs6fN3AWtCD0LJLu6nlYmxaqk,44348
|
|
8
|
-
upgini/metadata.py,sha256=
|
|
8
|
+
upgini/metadata.py,sha256=soRxxAG9gpOk77oOxCl35f9nWPDTaYyJEHgwKWhkS84,12666
|
|
9
9
|
upgini/metrics.py,sha256=KCPE_apPN-9BIdv6GqASbJVaB_gBcy8wzNApAcyaGo4,46020
|
|
10
10
|
upgini/search_task.py,sha256=5mL_qV5mVtDkIumM9xCOgfa9Lc2B8mxJ1qI21iaScnQ,18656
|
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
@@ -58,7 +58,7 @@ upgini/utils/display_utils.py,sha256=MoTqXZJvC6pAqgOaI3V0FG-IU_LnMfrn4TDcNvUqsdg
|
|
|
58
58
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
|
59
59
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
60
60
|
upgini/utils/feature_info.py,sha256=SQTRbSxJDkh2G2c0KGBmOv8f69gVzWbTtcXn0_2Qb-8,7945
|
|
61
|
-
upgini/utils/features_validator.py,sha256=
|
|
61
|
+
upgini/utils/features_validator.py,sha256=pAyS57-jYlihMOhweM12GOvCTJC13fTIJ9lQwgoON5c,4977
|
|
62
62
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
63
63
|
upgini/utils/hash_utils.py,sha256=mP2yHyzvDNdpa5g3B4MHzulxBeEz_ZSoGl1YF_VnAyE,5538
|
|
64
64
|
upgini/utils/ip_utils.py,sha256=wmnnwVQdjX9o1cNQw6VQMk6maHhvsq6hNsZBYf9knrw,6585
|
|
@@ -74,7 +74,7 @@ upgini/utils/target_utils.py,sha256=CihpV6SC95HwtlMH60rGAUzVDa4Id0Bva8ySprmNHlE,
|
|
|
74
74
|
upgini/utils/track_info.py,sha256=NDKeQTUlZaYp15UoP-xLKGoDoJQ0drbDMwB0g9R0PUg,6427
|
|
75
75
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
76
76
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
77
|
-
upgini-1.2.
|
|
78
|
-
upgini-1.2.
|
|
79
|
-
upgini-1.2.
|
|
80
|
-
upgini-1.2.
|
|
77
|
+
upgini-1.2.136.dist-info/METADATA,sha256=iszl9ghp_J6AHfTIZCtf5PliRGfKprpYxdwPfSUsEPg,51133
|
|
78
|
+
upgini-1.2.136.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
79
|
+
upgini-1.2.136.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
80
|
+
upgini-1.2.136.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|