upgini 1.1.297__tar.gz → 1.1.298__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.297 → upgini-1.1.298}/PKG-INFO +3 -3
- {upgini-1.1.297 → upgini-1.1.298}/README.md +2 -2
- upgini-1.1.298/src/upgini/__about__.py +1 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/all_operands.py +9 -1
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/date.py +46 -16
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/feature.py +27 -5
- upgini-1.1.297/src/upgini/__about__.py +0 -1
- {upgini-1.1.297 → upgini-1.1.298}/.gitignore +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/LICENSE +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/pyproject.toml +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/ads.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/dataset.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/errors.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/features_enricher.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/http.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/lazy_import.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/metadata.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/metrics.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/search_task.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/spinner.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.297 → upgini-1.1.298}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.298
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -131,7 +131,7 @@ Description-Content-Type: text/markdown
|
|
|
131
131
|
|Consumer Confidence index| 44 |22|-|Monthly|date, country|No
|
|
132
132
|
|World economic indicators|191 |41|-|Monthly|date, country|No
|
|
133
133
|
|Markets data|-|17|-|Monthly|date, datetime|No
|
|
134
|
-
|World mobile & fixed broadband network coverage and
|
|
134
|
+
|World mobile & fixed broadband network coverage and performance |167|-|3|Monthly|country, postal/ZIP code|No
|
|
135
135
|
|World demographic data |90|-|2|Annual|country, postal/ZIP code|No
|
|
136
136
|
|World house prices |44|-|3|Annual|country, postal/ZIP code|No
|
|
137
137
|
|Public social media profile data |104|-|-|Monthly|date, email/HEM, phone |Yes
|
|
@@ -840,4 +840,4 @@ Some convenient ways to start contributing are:
|
|
|
840
840
|
- [More perks for registered users](https://profile.upgini.com)
|
|
841
841
|
|
|
842
842
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
843
|
-
Please report it here</a></sup>
|
|
843
|
+
Please report it here</a></sup>
|
|
@@ -90,7 +90,7 @@
|
|
|
90
90
|
|Consumer Confidence index| 44 |22|-|Monthly|date, country|No
|
|
91
91
|
|World economic indicators|191 |41|-|Monthly|date, country|No
|
|
92
92
|
|Markets data|-|17|-|Monthly|date, datetime|No
|
|
93
|
-
|World mobile & fixed broadband network coverage and
|
|
93
|
+
|World mobile & fixed broadband network coverage and performance |167|-|3|Monthly|country, postal/ZIP code|No
|
|
94
94
|
|World demographic data |90|-|2|Annual|country, postal/ZIP code|No
|
|
95
95
|
|World house prices |44|-|3|Annual|country, postal/ZIP code|No
|
|
96
96
|
|Public social media profile data |104|-|-|Monthly|date, email/HEM, phone |Yes
|
|
@@ -799,4 +799,4 @@ Some convenient ways to start contributing are:
|
|
|
799
799
|
- [More perks for registered users](https://profile.upgini.com)
|
|
800
800
|
|
|
801
801
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
802
|
-
Please report it here</a></sup>
|
|
802
|
+
Please report it here</a></sup>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.298"
|
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
3
|
from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
4
|
-
from upgini.autofe.date import
|
|
4
|
+
from upgini.autofe.date import (
|
|
5
|
+
DateDiff,
|
|
6
|
+
DateDiffType2,
|
|
7
|
+
DateListDiff,
|
|
8
|
+
DateListDiffBounded,
|
|
9
|
+
DatePercentile,
|
|
10
|
+
DatePercentileMethod2,
|
|
11
|
+
)
|
|
5
12
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
6
13
|
from upgini.autofe.operand import Operand
|
|
7
14
|
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
|
|
@@ -50,6 +57,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
50
57
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
51
58
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
52
59
|
DatePercentile(),
|
|
60
|
+
DatePercentileMethod2(),
|
|
53
61
|
Norm(),
|
|
54
62
|
]
|
|
55
63
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import abc
|
|
1
2
|
from typing import Any, Dict, List, Optional, Union
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
@@ -38,6 +39,7 @@ class DateDiffMixin(BaseModel):
|
|
|
38
39
|
|
|
39
40
|
class DateDiff(PandasOperand, DateDiffMixin):
|
|
40
41
|
name = "date_diff"
|
|
42
|
+
alias = "date_diff_type1"
|
|
41
43
|
is_binary = True
|
|
42
44
|
has_symmetry_importance = True
|
|
43
45
|
|
|
@@ -159,12 +161,45 @@ class DateListDiffBounded(DateListDiff):
|
|
|
159
161
|
return super()._agg(x)
|
|
160
162
|
|
|
161
163
|
|
|
162
|
-
class
|
|
163
|
-
name = "date_per"
|
|
164
|
+
class DatePercentileBase(PandasOperand, abc.ABC):
|
|
164
165
|
is_binary = True
|
|
165
166
|
output_type = "float"
|
|
166
167
|
|
|
167
168
|
date_unit: Optional[str] = None
|
|
169
|
+
|
|
170
|
+
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
171
|
+
# Assuming that left is a date column, right is a feature column
|
|
172
|
+
left = pd.to_datetime(left, unit=self.date_unit)
|
|
173
|
+
|
|
174
|
+
bounds = self._get_bounds(left)
|
|
175
|
+
|
|
176
|
+
return right.index.to_series().apply(lambda i: self._perc(right[i], bounds[i]))
|
|
177
|
+
|
|
178
|
+
@abc.abstractmethod
|
|
179
|
+
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
def _perc(self, f, bounds):
|
|
183
|
+
hit = np.where(f >= bounds)[0]
|
|
184
|
+
if hit.size > 0:
|
|
185
|
+
return np.max(hit) + 1
|
|
186
|
+
else:
|
|
187
|
+
return np.nan
|
|
188
|
+
|
|
189
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
190
|
+
res = super().get_params()
|
|
191
|
+
res.update(
|
|
192
|
+
{
|
|
193
|
+
"date_unit": self.date_unit,
|
|
194
|
+
}
|
|
195
|
+
)
|
|
196
|
+
return res
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class DatePercentile(DatePercentileBase):
|
|
200
|
+
name = "date_per"
|
|
201
|
+
alias = "date_per_method1"
|
|
202
|
+
|
|
168
203
|
zero_month: Optional[int]
|
|
169
204
|
zero_year: Optional[int]
|
|
170
205
|
zero_bounds: Optional[List[float]]
|
|
@@ -174,7 +209,6 @@ class DatePercentile(PandasOperand):
|
|
|
174
209
|
res = super().get_params()
|
|
175
210
|
res.update(
|
|
176
211
|
{
|
|
177
|
-
"date_unit": self.date_unit,
|
|
178
212
|
"zero_month": self.zero_month,
|
|
179
213
|
"zero_year": self.zero_year,
|
|
180
214
|
"zero_bounds": self.zero_bounds,
|
|
@@ -190,22 +224,18 @@ class DatePercentile(PandasOperand):
|
|
|
190
224
|
elif isinstance(value, str):
|
|
191
225
|
return value[1:-1].split(", ")
|
|
192
226
|
|
|
193
|
-
def
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
months = left.dt.month
|
|
197
|
-
years = left.dt.year
|
|
227
|
+
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
228
|
+
months = date_col.dt.month
|
|
229
|
+
years = date_col.dt.year
|
|
198
230
|
|
|
199
231
|
month_diffs = 12 * (years - (self.zero_year or 0)) + (months - (self.zero_month or 0))
|
|
200
|
-
|
|
232
|
+
return month_diffs.apply(
|
|
201
233
|
lambda d: np.array(self.zero_bounds if self.zero_bounds is not None else []) + d * self.step
|
|
202
234
|
)
|
|
203
235
|
|
|
204
|
-
return right.index.to_series().apply(lambda i: self.__perc(right[i], bounds[i]))
|
|
205
236
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
return np.nan
|
|
237
|
+
class DatePercentileMethod2(DatePercentileBase):
|
|
238
|
+
name = "date_per_method2"
|
|
239
|
+
|
|
240
|
+
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
241
|
+
pass
|
|
@@ -41,7 +41,7 @@ class Column:
|
|
|
41
41
|
def get_column_nodes(self) -> List["Column"]:
|
|
42
42
|
return [self]
|
|
43
43
|
|
|
44
|
-
def get_columns(self) -> List[str]:
|
|
44
|
+
def get_columns(self, **kwargs) -> List[str]:
|
|
45
45
|
return [self.name]
|
|
46
46
|
|
|
47
47
|
def infer_type(self, data: pd.DataFrame) -> DtypeObj:
|
|
@@ -57,6 +57,12 @@ class Column:
|
|
|
57
57
|
def to_pretty_formula(self) -> str:
|
|
58
58
|
return self.to_formula()
|
|
59
59
|
|
|
60
|
+
def __eq__(self, value: object) -> bool:
|
|
61
|
+
if not isinstance(value, Column):
|
|
62
|
+
return False
|
|
63
|
+
else:
|
|
64
|
+
return self.name == value.name and self.calculate_all == value.calculate_all
|
|
65
|
+
|
|
60
66
|
|
|
61
67
|
class Feature:
|
|
62
68
|
def __init__(
|
|
@@ -125,6 +131,9 @@ class Feature:
|
|
|
125
131
|
for child in self.children:
|
|
126
132
|
child.delete_data()
|
|
127
133
|
|
|
134
|
+
def get_op_display_name(self) -> str:
|
|
135
|
+
return self.op.alias or self.op.name.lower()
|
|
136
|
+
|
|
128
137
|
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
129
138
|
if self.cached_display_name is not None and cache:
|
|
130
139
|
return self.cached_display_name
|
|
@@ -132,11 +141,11 @@ class Feature:
|
|
|
132
141
|
if self.alias:
|
|
133
142
|
components = ["f_autofe", self.alias]
|
|
134
143
|
elif shorten and not self.op.is_unary:
|
|
135
|
-
components = ["f_autofe", self.
|
|
144
|
+
components = ["f_autofe", self.get_op_display_name()]
|
|
136
145
|
else:
|
|
137
146
|
components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
|
|
138
147
|
"autofe",
|
|
139
|
-
self.
|
|
148
|
+
self.get_op_display_name(),
|
|
140
149
|
]
|
|
141
150
|
components.extend([str(self.display_index)] if self.display_index is not None else [])
|
|
142
151
|
display_name = "_".join(components)
|
|
@@ -306,8 +315,21 @@ class FeatureGroup:
|
|
|
306
315
|
main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
|
|
307
316
|
if isinstance(self.op, PandasOperand):
|
|
308
317
|
columns = self.get_columns()
|
|
309
|
-
|
|
310
|
-
|
|
318
|
+
lower_order_children = [
|
|
319
|
+
ch for f in self.children for ch in f.children if ch.get_display_name() != main_column
|
|
320
|
+
]
|
|
321
|
+
lower_order_names = [ch.get_display_name() for ch in lower_order_children]
|
|
322
|
+
if any(isinstance(f, Feature) for f in lower_order_children):
|
|
323
|
+
child_data = pd.concat(
|
|
324
|
+
[data[main_column]] + [ch.calculate(data) for ch in lower_order_children],
|
|
325
|
+
axis=1,
|
|
326
|
+
)
|
|
327
|
+
child_data.columns = [main_column] + lower_order_names
|
|
328
|
+
else:
|
|
329
|
+
child_data = data[columns]
|
|
330
|
+
|
|
331
|
+
new_data = self.op.calculate_group(child_data, main_column=main_column)
|
|
332
|
+
new_data.rename(columns=dict(zip(lower_order_names, self.get_display_names())), inplace=True)
|
|
311
333
|
else:
|
|
312
334
|
raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
|
|
313
335
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.297"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|