upgini 1.1.290__py3-none-any.whl → 1.1.290a3232.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/all_operands.py +2 -1
- upgini/autofe/unary.py +13 -0
- upgini/utils/datetime_utils.py +8 -15
- {upgini-1.1.290.dist-info → upgini-1.1.290a3232.post1.dist-info}/METADATA +1 -1
- {upgini-1.1.290.dist-info → upgini-1.1.290a3232.post1.dist-info}/RECORD +8 -8
- {upgini-1.1.290.dist-info → upgini-1.1.290a3232.post1.dist-info}/WHEEL +0 -0
- {upgini-1.1.290.dist-info → upgini-1.1.290a3232.post1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.290a3232-1"
|
upgini/autofe/all_operands.py
CHANGED
|
@@ -4,7 +4,7 @@ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
|
4
4
|
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
|
|
5
5
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
6
6
|
from upgini.autofe.operand import Operand
|
|
7
|
-
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
|
|
7
|
+
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Scale, Sigmoid, Sqrt, Square
|
|
8
8
|
from upgini.autofe.vector import Mean, Sum
|
|
9
9
|
|
|
10
10
|
ALL_OPERANDS: Dict[str, Operand] = {
|
|
@@ -50,6 +50,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
50
50
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
51
51
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
52
52
|
DatePercentile(),
|
|
53
|
+
Scale(),
|
|
53
54
|
]
|
|
54
55
|
}
|
|
55
56
|
|
upgini/autofe/unary.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import pandas as pd
|
|
3
|
+
from sklearn.preprocessing import robust_scale
|
|
3
4
|
|
|
4
5
|
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
5
6
|
|
|
@@ -111,3 +112,15 @@ class Freq(PandasOperand):
|
|
|
111
112
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
112
113
|
value_counts = data.value_counts(normalize=True)
|
|
113
114
|
return self._loc(data, value_counts)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class Scale(PandasOperand, VectorizableMixin):
|
|
118
|
+
name = "scale"
|
|
119
|
+
is_unary = True
|
|
120
|
+
output_type = "float"
|
|
121
|
+
|
|
122
|
+
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
123
|
+
return pd.Series(robust_scale(data), index=data.index, name=data.name)
|
|
124
|
+
|
|
125
|
+
def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
|
|
126
|
+
return pd.DataFrame(robust_scale(data), index=data.index, columns=data.columns)
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -82,19 +82,19 @@ class DateTimeSearchKeyConverter:
|
|
|
82
82
|
elif isinstance(df[self.date_column].values[0], datetime.date):
|
|
83
83
|
df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
|
|
84
84
|
elif is_period_dtype(df[self.date_column]):
|
|
85
|
-
df[self.date_column] = df[self.date_column].
|
|
85
|
+
df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
|
|
86
86
|
elif is_numeric_dtype(df[self.date_column]):
|
|
87
87
|
# 315532801 - 2524608001 - seconds
|
|
88
88
|
# 315532801000 - 2524608001000 - milliseconds
|
|
89
89
|
# 315532801000000 - 2524608001000000 - microseconds
|
|
90
90
|
# 315532801000000000 - 2524608001000000000 - nanoseconds
|
|
91
|
-
if df[self.date_column].apply(lambda x: 10
|
|
91
|
+
if df[self.date_column].apply(lambda x: 10**16 < x).all():
|
|
92
92
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
|
|
93
|
-
elif df[self.date_column].apply(lambda x: 10
|
|
93
|
+
elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
|
|
94
94
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
|
|
95
|
-
elif df[self.date_column].apply(lambda x: 10
|
|
95
|
+
elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
|
|
96
96
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
|
|
97
|
-
elif df[self.date_column].apply(lambda x: 0 < x < 10
|
|
97
|
+
elif df[self.date_column].apply(lambda x: 0 < x < 10 * 11).all():
|
|
98
98
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
|
|
99
99
|
else:
|
|
100
100
|
msg = self.bundle.get("unsupported_date_type").format(self.date_column)
|
|
@@ -185,10 +185,7 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
|
|
|
185
185
|
def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
|
|
186
186
|
df = df.copy()
|
|
187
187
|
seconds = "datetime_seconds"
|
|
188
|
-
|
|
189
|
-
df[date_col] = df[date_col].dt.to_timestamp()
|
|
190
|
-
else:
|
|
191
|
-
df[date_col] = pd.to_datetime(df[date_col])
|
|
188
|
+
df[date_col] = pd.to_datetime(df[date_col])
|
|
192
189
|
df[date_col] = df[date_col].dt.tz_localize(None)
|
|
193
190
|
df[seconds] = (df[date_col] - df[date_col].dt.floor("D")).dt.seconds
|
|
194
191
|
|
|
@@ -251,9 +248,7 @@ def validate_dates_distribution(
|
|
|
251
248
|
if col in search_keys:
|
|
252
249
|
continue
|
|
253
250
|
try:
|
|
254
|
-
if
|
|
255
|
-
pass
|
|
256
|
-
elif pd.__version__ >= "2.0.0":
|
|
251
|
+
if pd.__version__ >= "2.0.0":
|
|
257
252
|
# Format mixed to avoid massive warnings
|
|
258
253
|
pd.to_datetime(X[col], format="mixed")
|
|
259
254
|
else:
|
|
@@ -266,9 +261,7 @@ def validate_dates_distribution(
|
|
|
266
261
|
if maybe_date_col is None:
|
|
267
262
|
return
|
|
268
263
|
|
|
269
|
-
if
|
|
270
|
-
dates = X[maybe_date_col].dt.to_timestamp().dt.date
|
|
271
|
-
elif pd.__version__ >= "2.0.0":
|
|
264
|
+
if pd.__version__ >= "2.0.0":
|
|
272
265
|
dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
|
|
273
266
|
else:
|
|
274
267
|
dates = pd.to_datetime(X[maybe_date_col]).dt.date
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=SDsUMCDGdgycHBSyFToeEcF3TmvsEtCl-63CS7TpVBc,31
|
|
2
2
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
|
|
@@ -13,13 +13,13 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
|
|
|
13
13
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
14
14
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
15
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
-
upgini/autofe/all_operands.py,sha256=
|
|
16
|
+
upgini/autofe/all_operands.py,sha256=RHz7uLrcrcXeli8R4DhGZ2NhwGhNyWJhmSpFT_F3l5A,2423
|
|
17
17
|
upgini/autofe/binary.py,sha256=441BRuqMsxlxuw4c8rMZB6h5EpRdVMk-bVa03U7T5Hg,3973
|
|
18
18
|
upgini/autofe/date.py,sha256=qzk0NT332Q0vR1eRwTuNiMSrGE3ulh6Ic3QLBZqSdvw,7284
|
|
19
19
|
upgini/autofe/feature.py,sha256=_V9B74B3ue7eAYXSOt9JKhVC9klkAKks22MwnBRye_w,12487
|
|
20
20
|
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
21
21
|
upgini/autofe/operand.py,sha256=JjEVT1U3kY9NDjUPMdoki7Oa8hMDG0-_h_NklVjIFyc,2882
|
|
22
|
-
upgini/autofe/unary.py,sha256=
|
|
22
|
+
upgini/autofe/unary.py,sha256=1EgesKM8M1Lm2Z5VrlgXj3aI0Z88hZnJDbuPaYJyyj4,3614
|
|
23
23
|
upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
|
|
24
24
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
upgini/data_source/data_source_publisher.py,sha256=B4fJ1owDCF5ZZ0Ca9ywi_CXVt4iPvABh5BGTnXdXmHk,16635
|
|
@@ -41,7 +41,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
|
|
|
41
41
|
upgini/utils/country_utils.py,sha256=yE8oRgMpXuJxPfQm4fioY6dg6700HgVnHSk4Cv9sUyM,6511
|
|
42
42
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
43
43
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
44
|
-
upgini/utils/datetime_utils.py,sha256
|
|
44
|
+
upgini/utils/datetime_utils.py,sha256=-LsDTThsGKsTZ57V1uNiHtLcoTtqktk5tui4WnqggJo,10673
|
|
45
45
|
upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
|
|
46
46
|
upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
|
|
47
47
|
upgini/utils/email_utils.py,sha256=PLufTO97Pg9PPsNqB9agcM6M98MIxKUgIgNn2mVwSQ0,3520
|
|
@@ -56,7 +56,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
56
56
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
57
57
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
58
58
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
59
|
+
upgini-1.1.290a3232.post1.dist-info/METADATA,sha256=xp8PeULHddcPJBU_-5296G5KMf3gU03lRwqlYErPD2Q,48128
|
|
60
|
+
upgini-1.1.290a3232.post1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
61
|
+
upgini-1.1.290a3232.post1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
62
|
+
upgini-1.1.290a3232.post1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|