upgini 1.2.29a1__py3-none-any.whl → 1.2.29a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/utils/datetime_utils.py +42 -36
- {upgini-1.2.29a1.dist-info → upgini-1.2.29a3.dist-info}/METADATA +1 -1
- {upgini-1.2.29a1.dist-info → upgini-1.2.29a3.dist-info}/RECORD +6 -6
- {upgini-1.2.29a1.dist-info → upgini-1.2.29a3.dist-info}/WHEEL +0 -0
- {upgini-1.2.29a1.dist-info → upgini-1.2.29a3.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.29a3"
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -109,54 +109,60 @@ class DateTimeSearchKeyConverter:
|
|
|
109
109
|
|
|
110
110
|
df = self.clean_old_dates(df)
|
|
111
111
|
|
|
112
|
+
# Define function to apply sine and cosine transformations
|
|
113
|
+
def add_cyclical_features(df, column, period):
|
|
114
|
+
period_suffix = f"_{period}" if column != 'day_in_quarter' else ""
|
|
115
|
+
sin_feature = f"datetime_{column}_sin{period_suffix}"
|
|
116
|
+
cos_feature = f"datetime_{column}_cos{period_suffix}"
|
|
117
|
+
df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
|
|
118
|
+
df[cos_feature] = np.cos(2 * np.pi * df[column] / period)
|
|
119
|
+
self.generated_features.append(sin_feature)
|
|
120
|
+
self.generated_features.append(cos_feature)
|
|
121
|
+
|
|
122
|
+
df["quarter"] = df[self.date_column].dt.quarter
|
|
123
|
+
|
|
124
|
+
# Calculate the start date of the quarter for each timestamp
|
|
125
|
+
df["quarter_start"] = df[self.date_column].dt.to_period("Q").dt.start_time
|
|
126
|
+
|
|
127
|
+
# Calculate the day in the quarter
|
|
128
|
+
df["day_in_quarter"] = (df[self.date_column] - df["quarter_start"]).dt.days + 1
|
|
129
|
+
|
|
130
|
+
# Vectorized calculation of days_in_quarter
|
|
131
|
+
quarter = df["quarter"]
|
|
132
|
+
start = df["quarter_start"]
|
|
133
|
+
year = start.dt.year
|
|
134
|
+
month = start.dt.month
|
|
135
|
+
|
|
136
|
+
quarter_end_year = np.where(quarter == 4, year + 1, year)
|
|
137
|
+
quarter_end_month = np.where(quarter == 4, 1, month + 3)
|
|
138
|
+
|
|
139
|
+
end = pd.to_datetime({"year": quarter_end_year, "month": quarter_end_month, "day": 1})
|
|
140
|
+
|
|
141
|
+
df["days_in_quarter"] = (end - start).dt.days
|
|
142
|
+
|
|
143
|
+
add_cyclical_features(df, "day_in_quarter", df["days_in_quarter"]) # Days in the quarter
|
|
144
|
+
|
|
145
|
+
df.drop(columns=["quarter", "quarter_start", "day_in_quarter", "days_in_quarter"], inplace=True)
|
|
146
|
+
|
|
112
147
|
df[seconds] = (df[self.date_column] - df[self.date_column].dt.floor("D")).dt.seconds
|
|
113
148
|
|
|
114
149
|
seconds_without_na = df[seconds].dropna()
|
|
115
150
|
if (seconds_without_na != 0).any() and seconds_without_na.nunique() > 1:
|
|
116
151
|
self.logger.info("Time found in date search key. Add extra features based on time")
|
|
117
152
|
|
|
118
|
-
# Extract
|
|
153
|
+
# Extract basic components
|
|
119
154
|
df["second"] = df[self.date_column].dt.second
|
|
120
155
|
df["minute"] = df[self.date_column].dt.minute
|
|
121
156
|
df["hour"] = df[self.date_column].dt.hour
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
# Get the actual number of days in each month
|
|
126
|
-
df["days_in_month"] = df[self.date_column].dt.days_in_month
|
|
127
|
-
|
|
128
|
-
# Define function to apply sine and cosine transformations
|
|
129
|
-
def add_cyclical_features(df, column, period):
|
|
130
|
-
sin_feature = f"datetime_{column}_sin_{period}"
|
|
131
|
-
cos_feature = f"datetime_{column}_cos_{period}"
|
|
132
|
-
df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
|
|
133
|
-
df[cos_feature] = np.cos(2 * np.pi * df[column] / period)
|
|
134
|
-
self.generated_features.append(sin_feature)
|
|
135
|
-
self.generated_features.append(cos_feature)
|
|
136
|
-
|
|
137
|
-
# Apply transformations using vectorized operations
|
|
157
|
+
|
|
158
|
+
# Apply cyclical transformations
|
|
138
159
|
add_cyclical_features(df, "second", 60) # Seconds in a minute
|
|
139
160
|
add_cyclical_features(df, "minute", 60) # Minutes in an hour
|
|
161
|
+
add_cyclical_features(df, "minute", 30) # Minutes in half an hour
|
|
140
162
|
add_cyclical_features(df, "hour", 24) # Hours in a day
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
# Extract quarter information
|
|
145
|
-
df["quarter"] = df[self.date_column].dt.quarter
|
|
146
|
-
|
|
147
|
-
# Apply transformations for quarters
|
|
148
|
-
add_cyclical_features(df, "quarter", 4) # Quarters in a year
|
|
149
|
-
# seconds_in_day = 60 * 60 * 24
|
|
150
|
-
# orders = [1, 2, 24, 48]
|
|
151
|
-
# for order in orders:
|
|
152
|
-
# sin_feature = f"datetime_time_sin_{order}"
|
|
153
|
-
# cos_feature = f"datetime_time_cos_{order}"
|
|
154
|
-
# df[sin_feature] = np.round(np.sin(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
|
|
155
|
-
# df[cos_feature] = np.round(np.cos(2 * np.pi * order * df[seconds] / seconds_in_day), 10)
|
|
156
|
-
# self.generated_features.append(sin_feature)
|
|
157
|
-
# self.generated_features.append(cos_feature)
|
|
158
|
-
|
|
159
|
-
df.drop(columns=["second", "minute", "hour", "day", "month", "days_in_month", "quarter"])
|
|
163
|
+
|
|
164
|
+
# Drop intermediate columns if not needed
|
|
165
|
+
df.drop(columns=["second", "minute", "hour"], inplace=True)
|
|
160
166
|
|
|
161
167
|
df.drop(columns=seconds, inplace=True)
|
|
162
168
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=g5pIOn0QIK7AYvLSK8cOcem2I_ZfKqz9pqOf071XTPQ,25
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
|
|
@@ -43,7 +43,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
|
|
|
43
43
|
upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
|
|
44
44
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
45
45
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
46
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
46
|
+
upgini/utils/datetime_utils.py,sha256=GiJhOXE4taDtC0PEBYloSN7jeLwN26AchOQnMSTUDpc,12996
|
|
47
47
|
upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
|
|
48
48
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
49
49
|
upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
|
|
@@ -58,7 +58,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
58
58
|
upgini/utils/target_utils.py,sha256=PU77nIhTz7IHbC4rpTpxrVxib6cdpRL9F1dhkjIffLY,10225
|
|
59
59
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
60
60
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
61
|
+
upgini-1.2.29a3.dist-info/METADATA,sha256=Ek9umOS0JA_zCCYMq7PWIcokbDM59DB0lbwQappDk1g,48580
|
|
62
|
+
upgini-1.2.29a3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
63
|
+
upgini-1.2.29a3.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
64
|
+
upgini-1.2.29a3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|