upgini 1.2.135a2__py3-none-any.whl → 1.2.136__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.135a2"
1
+ __version__ = "1.2.136"
upgini/dataset.py CHANGED
@@ -71,6 +71,7 @@ class Dataset:
71
71
  date_column: Optional[str] = None,
72
72
  id_columns: Optional[List[str]] = None,
73
73
  is_imbalanced: bool = False,
74
+ dropped_columns: Optional[List[str]] = None,
74
75
  random_state: Optional[int] = None,
75
76
  sample_config: Optional[SampleConfig] = None,
76
77
  rest_client: Optional[_RestClient] = None,
@@ -118,6 +119,7 @@ class Dataset:
118
119
  self.is_imbalanced: bool = False
119
120
  self.id_columns = id_columns
120
121
  self.is_imbalanced = is_imbalanced
122
+ self.dropped_columns = dropped_columns
121
123
  self.date_column = date_column
122
124
  if logger is not None:
123
125
  self.logger = logger
@@ -285,6 +287,7 @@ class Dataset:
285
287
  for key in search_group
286
288
  if key in self.columns_renaming
287
289
  and not self.columns_renaming.get(key).endswith(EmailSearchKeyConverter.ONE_DOMAIN_SUFFIX)
290
+ and not self.columns_renaming.get(key) == "current_date"
288
291
  }
289
292
  ipv4_column = self.etalon_def_checked.get(FileColumnMeaningType.IP_ADDRESS.value)
290
293
  if (
@@ -475,6 +478,7 @@ class Dataset:
475
478
  hierarchicalGroupKeys=self.hierarchical_group_keys,
476
479
  hierarchicalSubgroupKeys=self.hierarchical_subgroup_keys,
477
480
  taskType=self.task_type,
481
+ droppedColumns=self.dropped_columns,
478
482
  )
479
483
 
480
484
  @staticmethod
@@ -814,7 +814,7 @@ class FeaturesEnricher(TransformerMixin):
814
814
  keep_input=keep_input,
815
815
  )
816
816
  if TARGET in result.columns:
817
- result.drop(columns=TARGET, inplace=True)
817
+ result = result.drop(columns=TARGET)
818
818
  self.logger.info("Transform finished successfully")
819
819
  search_progress = SearchProgress(100.0, ProgressStage.FINISHED)
820
820
  if progress_bar is not None:
@@ -1737,6 +1737,10 @@ class FeaturesEnricher(TransformerMixin):
1737
1737
 
1738
1738
  self.logger.info(f"Excluding search keys: {excluding_search_keys}")
1739
1739
 
1740
+ file_meta = self._search_task.get_file_metadata(trace_id)
1741
+ fit_dropped_features = self.fit_dropped_features or file_meta.droppedColumns or []
1742
+ original_dropped_features = [columns_renaming.get(f, f) for f in fit_dropped_features]
1743
+
1740
1744
  client_features = [
1741
1745
  c
1742
1746
  for c in validated_X.columns.to_list()
@@ -1744,7 +1748,7 @@ class FeaturesEnricher(TransformerMixin):
1744
1748
  and c
1745
1749
  not in (
1746
1750
  excluding_search_keys
1747
- + list(self.fit_dropped_features)
1751
+ + original_dropped_features
1748
1752
  + [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
1749
1753
  )
1750
1754
  ]
@@ -2955,8 +2959,10 @@ if response.status_code == 200:
2955
2959
  trace_id: str,
2956
2960
  is_transform: bool = False,
2957
2961
  ):
2958
- fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
2959
- original_dropped_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_dropped_features]
2962
+ file_meta = self._search_task.get_file_metadata(trace_id)
2963
+ fit_dropped_features = self.fit_dropped_features or file_meta.droppedColumns or []
2964
+ fit_input_columns = [c.originalName for c in file_meta.columns]
2965
+ original_dropped_features = [self.fit_columns_renaming.get(c, c) for c in fit_dropped_features]
2960
2966
  new_columns_on_transform = [
2961
2967
  c for c in validated_Xy.columns if c not in fit_input_columns and c not in original_dropped_features
2962
2968
  ]
@@ -3380,6 +3386,7 @@ if response.status_code == 200:
3380
3386
  cv_type=self.cv,
3381
3387
  id_columns=self.__get_renamed_id_columns(),
3382
3388
  is_imbalanced=self.imbalanced,
3389
+ dropped_columns=[self.fit_columns_renaming.get(f, f) for f in self.fit_dropped_features],
3383
3390
  date_column=self._get_date_column(self.fit_search_keys),
3384
3391
  date_format=self.date_format,
3385
3392
  random_state=self.random_state,
upgini/metadata.py CHANGED
@@ -252,6 +252,7 @@ class FileMetadata(BaseModel):
252
252
  rowsCount: Optional[int] = None
253
253
  checksumMD5: Optional[str] = None
254
254
  digest: Optional[str] = None
255
+ droppedColumns: Optional[List[str]] = None
255
256
 
256
257
  def column_by_name(self, name: str) -> Optional[FileColumnMetadata]:
257
258
  for c in self.columns:
@@ -46,7 +46,7 @@ class FeaturesValidator:
46
46
 
47
47
  columns_renaming = columns_renaming or {}
48
48
 
49
- if one_hot_encoded_features:
49
+ if one_hot_encoded_features and len(one_hot_encoded_features) > 1:
50
50
  msg = bundle.get("one_hot_encoded_features").format(
51
51
  [columns_renaming.get(f, f) for f in one_hot_encoded_features]
52
52
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: upgini
3
- Version: 1.2.135a2
3
+ Version: 1.2.136
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,11 +1,11 @@
1
- upgini/__about__.py,sha256=oWrwInIpUPiizpYZx5aK8eS5-xGS-LZCzvEcZVkCUw8,26
1
+ upgini/__about__.py,sha256=geG4WOLTgtin9k9NxkyjPJlJc57re5T0iGsOwFw3cp4,24
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=Nm2ZmwyQqvTnymYpGUwyJWy7y2ebXlHMyYmGeGcyA_s,31652
4
+ upgini/dataset.py,sha256=4bWKKFdFbFvdcb-JS4Nt2Je8eKqPg5QRLlmchQuY2aw,31870
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=9h2EBrPhRzh6yrfHV4p3l_GSWPptVAjKLfjqv08VA5E,235490
6
+ upgini/features_enricher.py,sha256=9pBxZKWsXF_IhZXnswCQZGU-cqOOT1EqH-FPu1zJo4E,235950
7
7
  upgini/http.py,sha256=-J_wOpnwVnT0ebPC6sOs6fN3AWtCD0LJLu6nlYmxaqk,44348
8
- upgini/metadata.py,sha256=H3wiN37k-yqWZgbPD0tJzx8DzaCIkgmX5cybhByQWLg,12619
8
+ upgini/metadata.py,sha256=soRxxAG9gpOk77oOxCl35f9nWPDTaYyJEHgwKWhkS84,12666
9
9
  upgini/metrics.py,sha256=KCPE_apPN-9BIdv6GqASbJVaB_gBcy8wzNApAcyaGo4,46020
10
10
  upgini/search_task.py,sha256=5mL_qV5mVtDkIumM9xCOgfa9Lc2B8mxJ1qI21iaScnQ,18656
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
@@ -58,7 +58,7 @@ upgini/utils/display_utils.py,sha256=MoTqXZJvC6pAqgOaI3V0FG-IU_LnMfrn4TDcNvUqsdg
58
58
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
59
59
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
60
60
  upgini/utils/feature_info.py,sha256=SQTRbSxJDkh2G2c0KGBmOv8f69gVzWbTtcXn0_2Qb-8,7945
61
- upgini/utils/features_validator.py,sha256=RdRMisZYeJ8HVCKiKxqSyWjoLf_MsZNXxHIuWf6H2g4,4939
61
+ upgini/utils/features_validator.py,sha256=pAyS57-jYlihMOhweM12GOvCTJC13fTIJ9lQwgoON5c,4977
62
62
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
63
63
  upgini/utils/hash_utils.py,sha256=mP2yHyzvDNdpa5g3B4MHzulxBeEz_ZSoGl1YF_VnAyE,5538
64
64
  upgini/utils/ip_utils.py,sha256=wmnnwVQdjX9o1cNQw6VQMk6maHhvsq6hNsZBYf9knrw,6585
@@ -74,7 +74,7 @@ upgini/utils/target_utils.py,sha256=CihpV6SC95HwtlMH60rGAUzVDa4Id0Bva8ySprmNHlE,
74
74
  upgini/utils/track_info.py,sha256=NDKeQTUlZaYp15UoP-xLKGoDoJQ0drbDMwB0g9R0PUg,6427
75
75
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
76
76
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
77
- upgini-1.2.135a2.dist-info/METADATA,sha256=ybOlAv1CV6-gct4fhH9Gqkd-5CtTk2xS9bo58LTnHWw,51135
78
- upgini-1.2.135a2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
79
- upgini-1.2.135a2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
80
- upgini-1.2.135a2.dist-info/RECORD,,
77
+ upgini-1.2.136.dist-info/METADATA,sha256=iszl9ghp_J6AHfTIZCtf5PliRGfKprpYxdwPfSUsEPg,51133
78
+ upgini-1.2.136.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
79
+ upgini-1.2.136.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
80
+ upgini-1.2.136.dist-info/RECORD,,