upgini 1.1.288a0__py3-none-any.whl → 1.1.290__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/date.py +17 -7
- upgini/utils/datetime_utils.py +15 -8
- {upgini-1.1.288a0.dist-info → upgini-1.1.290.dist-info}/METADATA +1 -1
- {upgini-1.1.288a0.dist-info → upgini-1.1.290.dist-info}/RECORD +7 -7
- {upgini-1.1.288a0.dist-info → upgini-1.1.290.dist-info}/WHEEL +0 -0
- {upgini-1.1.288a0.dist-info → upgini-1.1.290.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.290"
|
upgini/autofe/date.py
CHANGED
|
@@ -21,6 +21,20 @@ class DateDiffMixin(BaseModel):
|
|
|
21
21
|
|
|
22
22
|
return pd.to_datetime(x, unit=unit)
|
|
23
23
|
|
|
24
|
+
def _convert_diff_to_unit(self, diff: Union[pd.Series, TimedeltaArray]) -> Union[pd.Series, TimedeltaArray]:
|
|
25
|
+
if self.diff_unit == "D":
|
|
26
|
+
if isinstance(diff, pd.Series) and diff.dtype == "object":
|
|
27
|
+
return diff.apply(lambda x: None if isinstance(x, float) and np.isnan(x) else x.days)
|
|
28
|
+
else:
|
|
29
|
+
return diff / np.timedelta64(1, self.diff_unit)
|
|
30
|
+
elif self.diff_unit == "Y":
|
|
31
|
+
if isinstance(diff, TimedeltaArray):
|
|
32
|
+
return (diff / 365 / 24 / 60 / 60 / 10**9).astype(int)
|
|
33
|
+
else:
|
|
34
|
+
return (diff / 365 / 24 / 60 / 60 / 10**9).dt.nanoseconds
|
|
35
|
+
else:
|
|
36
|
+
raise Exception(f"Unsupported difference unit: {self.diff_unit}")
|
|
37
|
+
|
|
24
38
|
|
|
25
39
|
class DateDiff(PandasOperand, DateDiffMixin):
|
|
26
40
|
name = "date_diff"
|
|
@@ -41,7 +55,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
41
55
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
42
56
|
left = self._convert_to_date(left, self.left_unit)
|
|
43
57
|
right = self._convert_to_date(right, self.right_unit)
|
|
44
|
-
|
|
58
|
+
diff = self._convert_diff_to_unit(left.dt.date - right.dt.date)
|
|
59
|
+
return self.__replace_negative(diff)
|
|
45
60
|
|
|
46
61
|
def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
|
|
47
62
|
x[x < 0] = None
|
|
@@ -107,12 +122,7 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
107
122
|
return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
|
|
108
123
|
|
|
109
124
|
def _diff(self, x: TimedeltaArray):
|
|
110
|
-
|
|
111
|
-
x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
|
|
112
|
-
elif self.diff_unit == "M":
|
|
113
|
-
raise Exception("Unsupported difference unit: Month")
|
|
114
|
-
else:
|
|
115
|
-
x = x / np.timedelta64(1, self.diff_unit)
|
|
125
|
+
x = self._convert_diff_to_unit(x)
|
|
116
126
|
return x[x > 0]
|
|
117
127
|
|
|
118
128
|
def _agg(self, x):
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -82,19 +82,19 @@ class DateTimeSearchKeyConverter:
|
|
|
82
82
|
elif isinstance(df[self.date_column].values[0], datetime.date):
|
|
83
83
|
df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
|
|
84
84
|
elif is_period_dtype(df[self.date_column]):
|
|
85
|
-
df[self.date_column] =
|
|
85
|
+
df[self.date_column] = df[self.date_column].dt.to_timestamp()
|
|
86
86
|
elif is_numeric_dtype(df[self.date_column]):
|
|
87
87
|
# 315532801 - 2524608001 - seconds
|
|
88
88
|
# 315532801000 - 2524608001000 - milliseconds
|
|
89
89
|
# 315532801000000 - 2524608001000000 - microseconds
|
|
90
90
|
# 315532801000000000 - 2524608001000000000 - nanoseconds
|
|
91
|
-
if df[self.date_column].apply(lambda x: 10**16 < x).all():
|
|
91
|
+
if df[self.date_column].apply(lambda x: 10 ** 16 < x).all():
|
|
92
92
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
|
|
93
|
-
elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
|
|
93
|
+
elif df[self.date_column].apply(lambda x: 10 ** 14 < x < 10 ** 16).all():
|
|
94
94
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
|
|
95
|
-
elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
|
|
95
|
+
elif df[self.date_column].apply(lambda x: 10 ** 11 < x < 10 ** 14).all():
|
|
96
96
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
|
|
97
|
-
elif df[self.date_column].apply(lambda x: 0 < x < 10
|
|
97
|
+
elif df[self.date_column].apply(lambda x: 0 < x < 10 ** 11).all():
|
|
98
98
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
|
|
99
99
|
else:
|
|
100
100
|
msg = self.bundle.get("unsupported_date_type").format(self.date_column)
|
|
@@ -185,7 +185,10 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
|
|
|
185
185
|
def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
|
|
186
186
|
df = df.copy()
|
|
187
187
|
seconds = "datetime_seconds"
|
|
188
|
-
|
|
188
|
+
if is_period_dtype(df[date_col]):
|
|
189
|
+
df[date_col] = df[date_col].dt.to_timestamp()
|
|
190
|
+
else:
|
|
191
|
+
df[date_col] = pd.to_datetime(df[date_col])
|
|
189
192
|
df[date_col] = df[date_col].dt.tz_localize(None)
|
|
190
193
|
df[seconds] = (df[date_col] - df[date_col].dt.floor("D")).dt.seconds
|
|
191
194
|
|
|
@@ -248,7 +251,9 @@ def validate_dates_distribution(
|
|
|
248
251
|
if col in search_keys:
|
|
249
252
|
continue
|
|
250
253
|
try:
|
|
251
|
-
if
|
|
254
|
+
if is_period_dtype(X[col]):
|
|
255
|
+
pass
|
|
256
|
+
elif pd.__version__ >= "2.0.0":
|
|
252
257
|
# Format mixed to avoid massive warnings
|
|
253
258
|
pd.to_datetime(X[col], format="mixed")
|
|
254
259
|
else:
|
|
@@ -261,7 +266,9 @@ def validate_dates_distribution(
|
|
|
261
266
|
if maybe_date_col is None:
|
|
262
267
|
return
|
|
263
268
|
|
|
264
|
-
if
|
|
269
|
+
if is_period_dtype(X[maybe_date_col]):
|
|
270
|
+
dates = X[maybe_date_col].dt.to_timestamp().dt.date
|
|
271
|
+
elif pd.__version__ >= "2.0.0":
|
|
265
272
|
dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
|
|
266
273
|
else:
|
|
267
274
|
dates = pd.to_datetime(X[maybe_date_col]).dt.date
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=14vdpqxzfW78z-avOfaHW_WatncVnDB1LrWtzjXSghM,24
|
|
2
2
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
|
|
@@ -15,7 +15,7 @@ upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo
|
|
|
15
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
upgini/autofe/all_operands.py,sha256=7UyvmmqGSqQu4kDgoFwQRKY__b9xKDk3Fpp2-H8A7AA,2399
|
|
17
17
|
upgini/autofe/binary.py,sha256=441BRuqMsxlxuw4c8rMZB6h5EpRdVMk-bVa03U7T5Hg,3973
|
|
18
|
-
upgini/autofe/date.py,sha256=
|
|
18
|
+
upgini/autofe/date.py,sha256=qzk0NT332Q0vR1eRwTuNiMSrGE3ulh6Ic3QLBZqSdvw,7284
|
|
19
19
|
upgini/autofe/feature.py,sha256=_V9B74B3ue7eAYXSOt9JKhVC9klkAKks22MwnBRye_w,12487
|
|
20
20
|
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
21
21
|
upgini/autofe/operand.py,sha256=JjEVT1U3kY9NDjUPMdoki7Oa8hMDG0-_h_NklVjIFyc,2882
|
|
@@ -41,7 +41,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
|
|
|
41
41
|
upgini/utils/country_utils.py,sha256=yE8oRgMpXuJxPfQm4fioY6dg6700HgVnHSk4Cv9sUyM,6511
|
|
42
42
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
43
43
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
44
|
-
upgini/utils/datetime_utils.py,sha256
|
|
44
|
+
upgini/utils/datetime_utils.py,sha256=Ujmu1ouwSFtG5SywQXJlmtDnGigAnIWPdE5Vx5NvgUM,10951
|
|
45
45
|
upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
|
|
46
46
|
upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
|
|
47
47
|
upgini/utils/email_utils.py,sha256=PLufTO97Pg9PPsNqB9agcM6M98MIxKUgIgNn2mVwSQ0,3520
|
|
@@ -56,7 +56,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
56
56
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
57
57
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
58
58
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
59
|
+
upgini-1.1.290.dist-info/METADATA,sha256=hCW_dnkYOifgc0LJpt9mktkSzt5Y038uJLk8k_cdIQM,48117
|
|
60
|
+
upgini-1.1.290.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
61
|
+
upgini-1.1.290.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
62
|
+
upgini-1.1.290.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|