upgini 1.1.290a3232.post1__tar.gz → 1.1.291__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/PKG-INFO +1 -1
- upgini-1.1.291/src/upgini/__about__.py +1 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/all_operands.py +2 -2
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/binary.py +7 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/operand.py +1 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/unary.py +8 -7
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/datetime_utils.py +15 -8
- upgini-1.1.290a3232.post1/src/upgini/__about__.py +0 -1
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/.gitignore +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/LICENSE +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/README.md +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/pyproject.toml +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/ads.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/date.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/dataset.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/errors.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/features_enricher.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/http.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/metadata.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/metrics.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/search_task.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/spinner.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.291"
|
|
@@ -4,7 +4,7 @@ from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
|
4
4
|
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
|
|
5
5
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
6
6
|
from upgini.autofe.operand import Operand
|
|
7
|
-
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual,
|
|
7
|
+
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
|
|
8
8
|
from upgini.autofe.vector import Mean, Sum
|
|
9
9
|
|
|
10
10
|
ALL_OPERANDS: Dict[str, Operand] = {
|
|
@@ -50,7 +50,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
50
50
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
51
51
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
52
52
|
DatePercentile(),
|
|
53
|
-
|
|
53
|
+
Norm(),
|
|
54
54
|
]
|
|
55
55
|
}
|
|
56
56
|
|
|
@@ -9,6 +9,7 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
|
9
9
|
class Min(PandasOperand):
|
|
10
10
|
name = "min"
|
|
11
11
|
is_binary = True
|
|
12
|
+
is_symmetrical = True
|
|
12
13
|
has_symmetry_importance = True
|
|
13
14
|
|
|
14
15
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
@@ -18,6 +19,7 @@ class Min(PandasOperand):
|
|
|
18
19
|
class Max(PandasOperand):
|
|
19
20
|
name = "max"
|
|
20
21
|
is_binary = True
|
|
22
|
+
is_symmetrical = True
|
|
21
23
|
has_symmetry_importance = True
|
|
22
24
|
|
|
23
25
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
@@ -28,6 +30,7 @@ class Add(PandasOperand, VectorizableMixin):
|
|
|
28
30
|
name = "+"
|
|
29
31
|
alias = "add"
|
|
30
32
|
is_binary = True
|
|
33
|
+
is_symmetrical = True
|
|
31
34
|
has_symmetry_importance = True
|
|
32
35
|
is_vectorizable = True
|
|
33
36
|
|
|
@@ -46,6 +49,7 @@ class Subtract(PandasOperand, VectorizableMixin):
|
|
|
46
49
|
name = "-"
|
|
47
50
|
alias = "sub"
|
|
48
51
|
is_binary = True
|
|
52
|
+
is_symmetrical = True
|
|
49
53
|
has_symmetry_importance = True
|
|
50
54
|
is_vectorizable = True
|
|
51
55
|
|
|
@@ -64,6 +68,7 @@ class Multiply(PandasOperand, VectorizableMixin):
|
|
|
64
68
|
name = "*"
|
|
65
69
|
alias = "mul"
|
|
66
70
|
is_binary = True
|
|
71
|
+
is_symmetrical = True
|
|
67
72
|
has_symmetry_importance = True
|
|
68
73
|
is_vectorizable = True
|
|
69
74
|
|
|
@@ -112,6 +117,7 @@ class Combine(PandasOperand):
|
|
|
112
117
|
class CombineThenFreq(PandasOperand):
|
|
113
118
|
name = "CombineThenFreq"
|
|
114
119
|
is_binary = True
|
|
120
|
+
is_symmetrical = True
|
|
115
121
|
has_symmetry_importance = True
|
|
116
122
|
output_type = "float"
|
|
117
123
|
is_distribution_dependent = True
|
|
@@ -128,6 +134,7 @@ class Sim(PandasOperand):
|
|
|
128
134
|
name = "sim"
|
|
129
135
|
is_binary = True
|
|
130
136
|
output_type = "float"
|
|
137
|
+
is_symmetrical = True
|
|
131
138
|
has_symmetry_importance = True
|
|
132
139
|
|
|
133
140
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import pandas as pd
|
|
3
|
-
from sklearn.preprocessing import
|
|
3
|
+
from sklearn.preprocessing import Normalizer
|
|
4
4
|
|
|
5
5
|
from upgini.autofe.operand import PandasOperand, VectorizableMixin
|
|
6
6
|
|
|
@@ -114,13 +114,14 @@ class Freq(PandasOperand):
|
|
|
114
114
|
return self._loc(data, value_counts)
|
|
115
115
|
|
|
116
116
|
|
|
117
|
-
class
|
|
118
|
-
name = "
|
|
117
|
+
class Norm(PandasOperand):
|
|
118
|
+
name = "norm"
|
|
119
119
|
is_unary = True
|
|
120
120
|
output_type = "float"
|
|
121
121
|
|
|
122
122
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
123
|
+
data_dropna = data.dropna()
|
|
124
|
+
normalized_data = Normalizer().transform(data_dropna.to_frame().T).T
|
|
125
|
+
normalized_data = pd.Series(normalized_data[:, 0], index=data_dropna.index, name=data.name)
|
|
126
|
+
normalized_data = normalized_data.reindex(data.index)
|
|
127
|
+
return normalized_data
|
|
@@ -82,19 +82,19 @@ class DateTimeSearchKeyConverter:
|
|
|
82
82
|
elif isinstance(df[self.date_column].values[0], datetime.date):
|
|
83
83
|
df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
|
|
84
84
|
elif is_period_dtype(df[self.date_column]):
|
|
85
|
-
df[self.date_column] =
|
|
85
|
+
df[self.date_column] = df[self.date_column].dt.to_timestamp()
|
|
86
86
|
elif is_numeric_dtype(df[self.date_column]):
|
|
87
87
|
# 315532801 - 2524608001 - seconds
|
|
88
88
|
# 315532801000 - 2524608001000 - milliseconds
|
|
89
89
|
# 315532801000000 - 2524608001000000 - microseconds
|
|
90
90
|
# 315532801000000000 - 2524608001000000000 - nanoseconds
|
|
91
|
-
if df[self.date_column].apply(lambda x: 10**16 < x).all():
|
|
91
|
+
if df[self.date_column].apply(lambda x: 10 ** 16 < x).all():
|
|
92
92
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
|
|
93
|
-
elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
|
|
93
|
+
elif df[self.date_column].apply(lambda x: 10 ** 14 < x < 10 ** 16).all():
|
|
94
94
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
|
|
95
|
-
elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
|
|
95
|
+
elif df[self.date_column].apply(lambda x: 10 ** 11 < x < 10 ** 14).all():
|
|
96
96
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
|
|
97
|
-
elif df[self.date_column].apply(lambda x: 0 < x < 10
|
|
97
|
+
elif df[self.date_column].apply(lambda x: 0 < x < 10 ** 11).all():
|
|
98
98
|
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
|
|
99
99
|
else:
|
|
100
100
|
msg = self.bundle.get("unsupported_date_type").format(self.date_column)
|
|
@@ -185,7 +185,10 @@ def is_time_series(df: pd.DataFrame, date_col: str) -> bool:
|
|
|
185
185
|
def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[str]) -> bool:
|
|
186
186
|
df = df.copy()
|
|
187
187
|
seconds = "datetime_seconds"
|
|
188
|
-
|
|
188
|
+
if is_period_dtype(df[date_col]):
|
|
189
|
+
df[date_col] = df[date_col].dt.to_timestamp()
|
|
190
|
+
else:
|
|
191
|
+
df[date_col] = pd.to_datetime(df[date_col])
|
|
189
192
|
df[date_col] = df[date_col].dt.tz_localize(None)
|
|
190
193
|
df[seconds] = (df[date_col] - df[date_col].dt.floor("D")).dt.seconds
|
|
191
194
|
|
|
@@ -248,7 +251,9 @@ def validate_dates_distribution(
|
|
|
248
251
|
if col in search_keys:
|
|
249
252
|
continue
|
|
250
253
|
try:
|
|
251
|
-
if
|
|
254
|
+
if is_period_dtype(X[col]):
|
|
255
|
+
pass
|
|
256
|
+
elif pd.__version__ >= "2.0.0":
|
|
252
257
|
# Format mixed to avoid massive warnings
|
|
253
258
|
pd.to_datetime(X[col], format="mixed")
|
|
254
259
|
else:
|
|
@@ -261,7 +266,9 @@ def validate_dates_distribution(
|
|
|
261
266
|
if maybe_date_col is None:
|
|
262
267
|
return
|
|
263
268
|
|
|
264
|
-
if
|
|
269
|
+
if is_period_dtype(X[maybe_date_col]):
|
|
270
|
+
dates = X[maybe_date_col].dt.to_timestamp().dt.date
|
|
271
|
+
elif pd.__version__ >= "2.0.0":
|
|
265
272
|
dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
|
|
266
273
|
else:
|
|
267
274
|
dates = pd.to_datetime(X[maybe_date_col]).dt.date
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.290a3232-1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.290a3232.post1 → upgini-1.1.291}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|