upgini 1.2.28__py3-none-any.whl → 1.2.29a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.28"
1
+ __version__ = "1.2.29a1"
@@ -82,7 +82,7 @@ unregistered_only_personal_keys=Only personal search keys used. Api_key from pro
82
82
  search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
83
83
  numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
84
84
  unsupported_search_key_type=Unsupported type of key in search_keys: {}
85
- unsupported_type_of_search_key=Unsupported type of search key: {}. It should be a member of SearcKey
85
+ unsupported_type_of_search_key=Unsupported type of search key: {}. It should be a member of SearchKey
86
86
  search_key_country_and_country_code=SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
87
87
  empty_search_key=Search key {} is empty. Please fill values or remove this search key
88
88
  single_constant_search_key=Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
@@ -114,16 +114,50 @@ class DateTimeSearchKeyConverter:
114
114
  seconds_without_na = df[seconds].dropna()
115
115
  if (seconds_without_na != 0).any() and seconds_without_na.nunique() > 1:
116
116
  self.logger.info("Time found in date search key. Add extra features based on time")
117
- seconds_in_day = 60 * 60 * 24
118
- orders = [1, 2, 24, 48]
119
- for order in orders:
120
- sin_feature = f"datetime_time_sin_{order}"
121
- cos_feature = f"datetime_time_cos_{order}"
122
- df[sin_feature] = np.round(np.sin(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
123
- df[cos_feature] = np.round(np.cos(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
117
+
118
+ # Extract time components
119
+ df["second"] = df[self.date_column].dt.second
120
+ df["minute"] = df[self.date_column].dt.minute
121
+ df["hour"] = df[self.date_column].dt.hour
122
+ df["day"] = df[self.date_column].dt.day
123
+ df["month"] = df[self.date_column].dt.month
124
+
125
+ # Get the actual number of days in each month
126
+ df["days_in_month"] = df[self.date_column].dt.days_in_month
127
+
128
+ # Define function to apply sine and cosine transformations
129
+ def add_cyclical_features(df, column, period):
130
+ sin_feature = f"datetime_{column}_sin_{period}"
131
+ cos_feature = f"datetime_{column}_cos_{period}"
132
+ df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
133
+ df[cos_feature] = np.cos(2 * np.pi * df[column] / period)
124
134
  self.generated_features.append(sin_feature)
125
135
  self.generated_features.append(cos_feature)
126
136
 
137
+ # Apply transformations using vectorized operations
138
+ add_cyclical_features(df, "second", 60) # Seconds in a minute
139
+ add_cyclical_features(df, "minute", 60) # Minutes in an hour
140
+ add_cyclical_features(df, "hour", 24) # Hours in a day
141
+ add_cyclical_features(df, "day", df["days_in_month"]) # Days in the specific month
142
+ add_cyclical_features(df, "month", 12) # Months in a year
143
+
144
+ # Extract quarter information
145
+ df["quarter"] = df[self.date_column].dt.quarter
146
+
147
+ # Apply transformations for quarters
148
+ add_cyclical_features(df, "quarter", 4) # Quarters in a year
149
+ # seconds_in_day = 60 * 60 * 24
150
+ # orders = [1, 2, 24, 48]
151
+ # for order in orders:
152
+ # sin_feature = f"datetime_time_sin_{order}"
153
+ # cos_feature = f"datetime_time_cos_{order}"
154
+ # df[sin_feature] = np.round(np.sin(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
155
+ # df[cos_feature] = np.round(np.cos(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
156
+ # self.generated_features.append(sin_feature)
157
+ # self.generated_features.append(cos_feature)
158
+
159
+ df.drop(columns=["second", "minute", "hour", "day", "month", "days_in_month", "quarter"])
160
+
127
161
  df.drop(columns=seconds, inplace=True)
128
162
 
129
163
  if keep_time:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.28
3
+ Version: 1.2.29a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=sZoYrdh97RjbwC1rg-3mt6kqvjxTxWL7ejhDDrG6mVM,23
1
+ upgini/__about__.py,sha256=YYK80OZpX7RU3u94Dr87MhqDbFFuOvIk3V5if415BD8,25
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
30
30
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
31
31
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
32
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=wn98wl2DNFPaGTMSdZAVr43P6t97i74PCqqwa762-V0,26673
33
+ upgini/resource_bundle/strings.properties,sha256=fOAeLTsnx8xvJK-7RPFXprATG0n56jeCdse8sQTuVX8,26674
34
34
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
35
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -43,7 +43,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
43
43
  upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
44
44
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
45
45
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
46
- upgini/utils/datetime_utils.py,sha256=a8X4jX2y3-6E7ZNZIG5z61qfzCvsvaNEjR1Bi5KUqfM,11279
46
+ upgini/utils/datetime_utils.py,sha256=Aw1e8e137PF_KMCUQrKPmnnukYjGJWvNGKSwXzsVLRs,13051
47
47
  upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
48
48
  upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
49
49
  upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
@@ -58,7 +58,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
58
58
  upgini/utils/target_utils.py,sha256=PU77nIhTz7IHbC4rpTpxrVxib6cdpRL9F1dhkjIffLY,10225
59
59
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
60
60
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
61
- upgini-1.2.28.dist-info/METADATA,sha256=qGINKPrh43Q1LFXWRlS1v6CSpME6rOmScAy0ShKxL_0,48578
62
- upgini-1.2.28.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
63
- upgini-1.2.28.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
64
- upgini-1.2.28.dist-info/RECORD,,
61
+ upgini-1.2.29a1.dist-info/METADATA,sha256=mVTEnNe3cUe5o_q3SoNHSydY6A8wv1LLMC2IpWkndMg,48580
62
+ upgini-1.2.29a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
63
+ upgini-1.2.29a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
64
+ upgini-1.2.29a1.dist-info/RECORD,,