upgini 1.1.296a3521.dev10__tar.gz → 1.1.297__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/PKG-INFO +1 -1
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/pyproject.toml +0 -1
- upgini-1.1.297/src/upgini/__about__.py +1 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/all_operands.py +1 -9
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/date.py +16 -46
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/feature.py +5 -30
- upgini-1.1.296a3521.dev10/src/upgini/__about__.py +0 -1
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/.gitignore +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/LICENSE +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/README.md +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/ads.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/dataset.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/errors.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/features_enricher.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/http.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/lazy_import.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/metadata.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/metrics.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/search_task.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/spinner.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.297"
|
|
@@ -1,14 +1,7 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
3
|
from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
4
|
-
from upgini.autofe.date import
|
|
5
|
-
DateDiff,
|
|
6
|
-
DateDiffType2,
|
|
7
|
-
DateListDiff,
|
|
8
|
-
DateListDiffBounded,
|
|
9
|
-
DatePercentile,
|
|
10
|
-
DatePercentileMethod2,
|
|
11
|
-
)
|
|
4
|
+
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
|
|
12
5
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
13
6
|
from upgini.autofe.operand import Operand
|
|
14
7
|
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
|
|
@@ -57,7 +50,6 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
57
50
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
58
51
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
59
52
|
DatePercentile(),
|
|
60
|
-
DatePercentileMethod2(),
|
|
61
53
|
Norm(),
|
|
62
54
|
]
|
|
63
55
|
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import abc
|
|
2
1
|
from typing import Any, Dict, List, Optional, Union
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
@@ -39,7 +38,6 @@ class DateDiffMixin(BaseModel):
|
|
|
39
38
|
|
|
40
39
|
class DateDiff(PandasOperand, DateDiffMixin):
|
|
41
40
|
name = "date_diff"
|
|
42
|
-
alias = "date_diff_type1"
|
|
43
41
|
is_binary = True
|
|
44
42
|
has_symmetry_importance = True
|
|
45
43
|
|
|
@@ -161,45 +159,12 @@ class DateListDiffBounded(DateListDiff):
|
|
|
161
159
|
return super()._agg(x)
|
|
162
160
|
|
|
163
161
|
|
|
164
|
-
class
|
|
162
|
+
class DatePercentile(PandasOperand):
|
|
163
|
+
name = "date_per"
|
|
165
164
|
is_binary = True
|
|
166
165
|
output_type = "float"
|
|
167
166
|
|
|
168
167
|
date_unit: Optional[str] = None
|
|
169
|
-
|
|
170
|
-
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
171
|
-
# Assuming that left is a date column, right is a feature column
|
|
172
|
-
left = pd.to_datetime(left, unit=self.date_unit)
|
|
173
|
-
|
|
174
|
-
bounds = self._get_bounds(left)
|
|
175
|
-
|
|
176
|
-
return right.index.to_series().apply(lambda i: self._perc(right[i], bounds[i]))
|
|
177
|
-
|
|
178
|
-
@abc.abstractmethod
|
|
179
|
-
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
180
|
-
pass
|
|
181
|
-
|
|
182
|
-
def _perc(self, f, bounds):
|
|
183
|
-
hit = np.where(f >= bounds)[0]
|
|
184
|
-
if hit.size > 0:
|
|
185
|
-
return np.max(hit) + 1
|
|
186
|
-
else:
|
|
187
|
-
return np.nan
|
|
188
|
-
|
|
189
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
|
190
|
-
res = super().get_params()
|
|
191
|
-
res.update(
|
|
192
|
-
{
|
|
193
|
-
"date_unit": self.date_unit,
|
|
194
|
-
}
|
|
195
|
-
)
|
|
196
|
-
return res
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
class DatePercentile(DatePercentileBase):
|
|
200
|
-
name = "date_per"
|
|
201
|
-
alias = "date_per_method1"
|
|
202
|
-
|
|
203
168
|
zero_month: Optional[int]
|
|
204
169
|
zero_year: Optional[int]
|
|
205
170
|
zero_bounds: Optional[List[float]]
|
|
@@ -209,6 +174,7 @@ class DatePercentile(DatePercentileBase):
|
|
|
209
174
|
res = super().get_params()
|
|
210
175
|
res.update(
|
|
211
176
|
{
|
|
177
|
+
"date_unit": self.date_unit,
|
|
212
178
|
"zero_month": self.zero_month,
|
|
213
179
|
"zero_year": self.zero_year,
|
|
214
180
|
"zero_bounds": self.zero_bounds,
|
|
@@ -224,18 +190,22 @@ class DatePercentile(DatePercentileBase):
|
|
|
224
190
|
elif isinstance(value, str):
|
|
225
191
|
return value[1:-1].split(", ")
|
|
226
192
|
|
|
227
|
-
def
|
|
228
|
-
|
|
229
|
-
|
|
193
|
+
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
194
|
+
# Assuming that left is a date column, right is a feature column
|
|
195
|
+
left = pd.to_datetime(left, unit=self.date_unit)
|
|
196
|
+
months = left.dt.month
|
|
197
|
+
years = left.dt.year
|
|
230
198
|
|
|
231
199
|
month_diffs = 12 * (years - (self.zero_year or 0)) + (months - (self.zero_month or 0))
|
|
232
|
-
|
|
200
|
+
bounds = month_diffs.apply(
|
|
233
201
|
lambda d: np.array(self.zero_bounds if self.zero_bounds is not None else []) + d * self.step
|
|
234
202
|
)
|
|
235
203
|
|
|
204
|
+
return right.index.to_series().apply(lambda i: self.__perc(right[i], bounds[i]))
|
|
236
205
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
206
|
+
def __perc(self, f, bounds):
|
|
207
|
+
hit = np.where(f >= bounds)[0]
|
|
208
|
+
if hit.size > 0:
|
|
209
|
+
return np.max(hit) + 1
|
|
210
|
+
else:
|
|
211
|
+
return np.nan
|
|
@@ -16,9 +16,6 @@ class Column:
|
|
|
16
16
|
self.data = data
|
|
17
17
|
self.calculate_all = calculate_all
|
|
18
18
|
|
|
19
|
-
def get_name_component(self, **kwargs) -> str:
|
|
20
|
-
return self.name
|
|
21
|
-
|
|
22
19
|
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
23
20
|
return self.name
|
|
24
21
|
|
|
@@ -44,7 +41,7 @@ class Column:
|
|
|
44
41
|
def get_column_nodes(self) -> List["Column"]:
|
|
45
42
|
return [self]
|
|
46
43
|
|
|
47
|
-
def get_columns(self
|
|
44
|
+
def get_columns(self) -> List[str]:
|
|
48
45
|
return [self.name]
|
|
49
46
|
|
|
50
47
|
def infer_type(self, data: pd.DataFrame) -> DtypeObj:
|
|
@@ -60,12 +57,6 @@ class Column:
|
|
|
60
57
|
def to_pretty_formula(self) -> str:
|
|
61
58
|
return self.to_formula()
|
|
62
59
|
|
|
63
|
-
def __eq__(self, value: object) -> bool:
|
|
64
|
-
if not isinstance(value, Column):
|
|
65
|
-
return False
|
|
66
|
-
else:
|
|
67
|
-
return self.name == value.name and self.calculate_all == value.calculate_all
|
|
68
|
-
|
|
69
60
|
|
|
70
61
|
class Feature:
|
|
71
62
|
def __init__(
|
|
@@ -134,9 +125,6 @@ class Feature:
|
|
|
134
125
|
for child in self.children:
|
|
135
126
|
child.delete_data()
|
|
136
127
|
|
|
137
|
-
def get_op_display_name(self) -> str:
|
|
138
|
-
return self.op.alias or self.op.name.lower()
|
|
139
|
-
|
|
140
128
|
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
141
129
|
if self.cached_display_name is not None and cache:
|
|
142
130
|
return self.cached_display_name
|
|
@@ -144,11 +132,11 @@ class Feature:
|
|
|
144
132
|
if self.alias:
|
|
145
133
|
components = ["f_autofe", self.alias]
|
|
146
134
|
elif shorten and not self.op.is_unary:
|
|
147
|
-
components = ["f_autofe", self.
|
|
135
|
+
components = ["f_autofe", self.op.alias or self.op.name.lower()]
|
|
148
136
|
else:
|
|
149
137
|
components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
|
|
150
138
|
"autofe",
|
|
151
|
-
self.
|
|
139
|
+
self.op.alias or self.op.name.lower(),
|
|
152
140
|
]
|
|
153
141
|
components.extend([str(self.display_index)] if self.display_index is not None else [])
|
|
154
142
|
display_name = "_".join(components)
|
|
@@ -318,21 +306,8 @@ class FeatureGroup:
|
|
|
318
306
|
main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
|
|
319
307
|
if isinstance(self.op, PandasOperand):
|
|
320
308
|
columns = self.get_columns()
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
]
|
|
324
|
-
lower_order_names = [ch.get_display_name() for ch in lower_order_children]
|
|
325
|
-
if any(isinstance(f, Feature) for f in lower_order_children):
|
|
326
|
-
child_data = pd.concat(
|
|
327
|
-
[data[main_column]] + [ch.calculate(data) for ch in lower_order_children],
|
|
328
|
-
axis=1,
|
|
329
|
-
)
|
|
330
|
-
child_data.columns = [main_column] + lower_order_names
|
|
331
|
-
else:
|
|
332
|
-
child_data = data[columns]
|
|
333
|
-
|
|
334
|
-
new_data = self.op.calculate_group(child_data, main_column=main_column)
|
|
335
|
-
new_data.rename(columns=dict(zip(lower_order_names, self.get_display_names())), inplace=True)
|
|
309
|
+
new_data = self.op.calculate_group(data[columns], main_column=main_column)
|
|
310
|
+
new_data.rename(columns=dict(zip(columns, self.get_display_names())), inplace=True)
|
|
336
311
|
else:
|
|
337
312
|
raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
|
|
338
313
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.296a3521.dev10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3521.dev10 → upgini-1.1.297}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|