upgini 1.2.84__py3-none-any.whl → 1.2.85a3857.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.84"
1
+ __version__ = "1.2.85a3857.dev1"
upgini/metrics.py CHANGED
@@ -391,9 +391,7 @@ class EstimatorWrapper:
391
391
  self.converted_to_int.append(c)
392
392
  self.cat_features.remove(c)
393
393
  elif is_float_dtype(x[c]) or (x[c].dtype == "category" and is_float_dtype(x[c].cat.categories)):
394
- self.logger.info(
395
- f"Convert float cat feature {c} to string"
396
- )
394
+ self.logger.info(f"Convert float cat feature {c} to string")
397
395
  x[c] = x[c].astype(str)
398
396
  self.converted_to_str.append(c)
399
397
  elif x[c].dtype not in ["category", "int64"]:
@@ -694,7 +692,15 @@ class CatBoostWrapper(EstimatorWrapper):
694
692
  x[c] = x[c].fillna(np.nan)
695
693
  elif x[c].dtype != "category":
696
694
  x[c] = x[c].fillna("NA")
697
- params["cat_features"] = self.cat_features
695
+ if isinstance(self.cv, TimeSeriesSplit) or isinstance(self.cv, BlockedTimeSeriesSplit):
696
+ self.logger.info("Using time-aware encoder for CatBoost")
697
+ encoder = CatBoostEncoder(random_state=DEFAULT_RANDOM_STATE, cols=self.cat_features, return_df=True)
698
+ encoded = encoder.fit_transform(x[self.cat_features].astype("object"), y)
699
+ x[self.cat_features] = encoded
700
+ self.cat_encoder = encoder
701
+ else:
702
+ self.cat_encoder = None
703
+ params["cat_features"] = self.cat_features
698
704
 
699
705
  return x, y, groups, params
700
706
 
@@ -738,7 +744,16 @@ class CatBoostWrapper(EstimatorWrapper):
738
744
  x[c] = x[c].fillna(np.nan)
739
745
  elif x[c].dtype != "category":
740
746
  x[c] = x[c].fillna("NA")
741
- params["cat_features"] = self.cat_features
747
+ if (
748
+ isinstance(self.cv, TimeSeriesSplit)
749
+ or isinstance(self.cv, BlockedTimeSeriesSplit)
750
+ and self.cat_encoder is not None
751
+ ):
752
+ self.logger.info("Using time-aware encoder for CatBoost")
753
+ encoded = self.cat_encoder.transform(x[self.cat_features].astype("object"), y)
754
+ x[self.cat_features] = encoded
755
+ else:
756
+ params["cat_features"] = self.cat_features
742
757
 
743
758
  return x, y, params
744
759
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.84
3
+ Version: 1.2.85a3857.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=aeTule-I84yrLN0UOJAi64jLCLq3VVYxtUewFZihoH8,23
1
+ upgini/__about__.py,sha256=fECI7PUZQG8IW2eHjUqgqHVtT40sMjfMgzLhuxKuQFA,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=fRtqSkXNONLnPe6cCL967GMt349FTIpXzy_u8LUKncw,35354
@@ -6,7 +6,7 @@ upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
6
  upgini/features_enricher.py,sha256=2ryADtOVEEebuUBhimusvnBzGxUkdTaqpEh2F1PqHSs,212719
7
7
  upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
8
8
  upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
9
- upgini/metrics.py,sha256=3cip0_L6-OFew74KsRwzxJDU6UFq05h2v7IsyHLcMRc,43164
9
+ upgini/metrics.py,sha256=zRrRpNqjSTubsyKPi0_jbHjE8QO_YqyHWtt1B5MfVH8,44086
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.84.dist-info/METADATA,sha256=jn2nCo_PdZx_hdSQYi9RFAFY7XyzgLlvz-U-WSY9Hm0,49162
74
- upgini-1.2.84.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.84.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.84.dist-info/RECORD,,
73
+ upgini-1.2.85a3857.dev1.dist-info/METADATA,sha256=XycmCsMeqC_7hsO0YzR0E8b4eGnIcD-MBzuFvB4T24s,49172
74
+ upgini-1.2.85a3857.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
+ upgini-1.2.85a3857.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.85a3857.dev1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.24.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any