upgini 1.2.28__py3-none-any.whl → 1.2.29a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/resource_bundle/strings.properties +1 -1
- upgini/utils/datetime_utils.py +41 -7
- {upgini-1.2.28.dist-info → upgini-1.2.29a1.dist-info}/METADATA +1 -1
- {upgini-1.2.28.dist-info → upgini-1.2.29a1.dist-info}/RECORD +7 -7
- {upgini-1.2.28.dist-info → upgini-1.2.29a1.dist-info}/WHEEL +0 -0
- {upgini-1.2.28.dist-info → upgini-1.2.29a1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.29a1"
|
|
@@ -82,7 +82,7 @@ unregistered_only_personal_keys=Only personal search keys used. Api_key from pro
|
|
|
82
82
|
search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
|
|
83
83
|
numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
|
|
84
84
|
unsupported_search_key_type=Unsupported type of key in search_keys: {}
|
|
85
|
-
unsupported_type_of_search_key=Unsupported type of search key: {}. It should be a member of
|
|
85
|
+
unsupported_type_of_search_key=Unsupported type of search key: {}. It should be a member of SearchKey
|
|
86
86
|
search_key_country_and_country_code=SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
|
|
87
87
|
empty_search_key=Search key {} is empty. Please fill values or remove this search key
|
|
88
88
|
single_constant_search_key=Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -114,16 +114,50 @@ class DateTimeSearchKeyConverter:
|
|
|
114
114
|
seconds_without_na = df[seconds].dropna()
|
|
115
115
|
if (seconds_without_na != 0).any() and seconds_without_na.nunique() > 1:
|
|
116
116
|
self.logger.info("Time found in date search key. Add extra features based on time")
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
117
|
+
|
|
118
|
+
# Extract time components
|
|
119
|
+
df["second"] = df[self.date_column].dt.second
|
|
120
|
+
df["minute"] = df[self.date_column].dt.minute
|
|
121
|
+
df["hour"] = df[self.date_column].dt.hour
|
|
122
|
+
df["day"] = df[self.date_column].dt.day
|
|
123
|
+
df["month"] = df[self.date_column].dt.month
|
|
124
|
+
|
|
125
|
+
# Get the actual number of days in each month
|
|
126
|
+
df["days_in_month"] = df[self.date_column].dt.days_in_month
|
|
127
|
+
|
|
128
|
+
# Define function to apply sine and cosine transformations
|
|
129
|
+
def add_cyclical_features(df, column, period):
|
|
130
|
+
sin_feature = f"datetime_{column}_sin_{period}"
|
|
131
|
+
cos_feature = f"datetime_{column}_cos_{period}"
|
|
132
|
+
df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
|
|
133
|
+
df[cos_feature] = np.cos(2 * np.pi * df[column] / period)
|
|
124
134
|
self.generated_features.append(sin_feature)
|
|
125
135
|
self.generated_features.append(cos_feature)
|
|
126
136
|
|
|
137
|
+
# Apply transformations using vectorized operations
|
|
138
|
+
add_cyclical_features(df, "second", 60) # Seconds in a minute
|
|
139
|
+
add_cyclical_features(df, "minute", 60) # Minutes in an hour
|
|
140
|
+
add_cyclical_features(df, "hour", 24) # Hours in a day
|
|
141
|
+
add_cyclical_features(df, "day", df["days_in_month"]) # Days in the specific month
|
|
142
|
+
add_cyclical_features(df, "month", 12) # Months in a year
|
|
143
|
+
|
|
144
|
+
# Extract quarter information
|
|
145
|
+
df["quarter"] = df[self.date_column].dt.quarter
|
|
146
|
+
|
|
147
|
+
# Apply transformations for quarters
|
|
148
|
+
add_cyclical_features(df, "quarter", 4) # Quarters in a year
|
|
149
|
+
# seconds_in_day = 60 * 60 * 24
|
|
150
|
+
# orders = [1, 2, 24, 48]
|
|
151
|
+
# for order in orders:
|
|
152
|
+
# sin_feature = f"datetime_time_sin_{order}"
|
|
153
|
+
# cos_feature = f"datetime_time_cos_{order}"
|
|
154
|
+
# df[sin_feature] = np.round(np.sin(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
|
|
155
|
+
# df[cos_feature] = np.round(np.cos(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
|
|
156
|
+
# self.generated_features.append(sin_feature)
|
|
157
|
+
# self.generated_features.append(cos_feature)
|
|
158
|
+
|
|
159
|
+
df.drop(columns=["second", "minute", "hour", "day", "month", "days_in_month", "quarter"])
|
|
160
|
+
|
|
127
161
|
df.drop(columns=seconds, inplace=True)
|
|
128
162
|
|
|
129
163
|
if keep_time:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=YYK80OZpX7RU3u94Dr87MhqDbFFuOvIk3V5if415BD8,25
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
|
|
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
30
30
|
upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
|
|
31
31
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
32
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
33
|
+
upgini/resource_bundle/strings.properties,sha256=fOAeLTsnx8xvJK-7RPFXprATG0n56jeCdse8sQTuVX8,26674
|
|
34
34
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
35
35
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -43,7 +43,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
|
|
|
43
43
|
upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
|
|
44
44
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
45
45
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
46
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
46
|
+
upgini/utils/datetime_utils.py,sha256=Aw1e8e137PF_KMCUQrKPmnnukYjGJWvNGKSwXzsVLRs,13051
|
|
47
47
|
upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
|
|
48
48
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
49
49
|
upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
|
|
@@ -58,7 +58,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
58
58
|
upgini/utils/target_utils.py,sha256=PU77nIhTz7IHbC4rpTpxrVxib6cdpRL9F1dhkjIffLY,10225
|
|
59
59
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
60
60
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
61
|
+
upgini-1.2.29a1.dist-info/METADATA,sha256=mVTEnNe3cUe5o_q3SoNHSydY6A8wv1LLMC2IpWkndMg,48580
|
|
62
|
+
upgini-1.2.29a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
63
|
+
upgini-1.2.29a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
64
|
+
upgini-1.2.29a1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|