upgini 1.1.296a3521.dev3__tar.gz → 1.1.296a3521.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/PKG-INFO +1 -1
- upgini-1.1.296a3521.dev5/src/upgini/__about__.py +1 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/date.py +2 -2
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/feature.py +34 -6
- upgini-1.1.296a3521.dev3/src/upgini/__about__.py +0 -1
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/.gitignore +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/LICENSE +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/README.md +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/pyproject.toml +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/ads.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/dataset.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/errors.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/features_enricher.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/http.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/lazy_import.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/metadata.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/metrics.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/search_task.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/spinner.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.296a3521.
|
|
3
|
+
Version: 1.1.296a3521.dev5
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.296a3521.dev5"
|
|
@@ -172,13 +172,13 @@ class DatePercentileBase(PandasOperand, abc.ABC):
|
|
|
172
172
|
|
|
173
173
|
bounds = self._get_bounds(left)
|
|
174
174
|
|
|
175
|
-
return right.index.to_series().apply(lambda i: self.
|
|
175
|
+
return right.index.to_series().apply(lambda i: self._perc(right[i], bounds[i]))
|
|
176
176
|
|
|
177
177
|
@abc.abstractmethod
|
|
178
178
|
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
179
179
|
pass
|
|
180
180
|
|
|
181
|
-
def
|
|
181
|
+
def _perc(self, f, bounds):
|
|
182
182
|
hit = np.where(f >= bounds)[0]
|
|
183
183
|
if hit.size > 0:
|
|
184
184
|
return np.max(hit) + 1
|
|
@@ -16,6 +16,9 @@ class Column:
|
|
|
16
16
|
self.data = data
|
|
17
17
|
self.calculate_all = calculate_all
|
|
18
18
|
|
|
19
|
+
def get_name_component(self, **kwargs) -> str:
|
|
20
|
+
return self.name
|
|
21
|
+
|
|
19
22
|
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
20
23
|
return self.name
|
|
21
24
|
|
|
@@ -41,7 +44,7 @@ class Column:
|
|
|
41
44
|
def get_column_nodes(self) -> List["Column"]:
|
|
42
45
|
return [self]
|
|
43
46
|
|
|
44
|
-
def get_columns(self) -> List[str]:
|
|
47
|
+
def get_columns(self, **kwargs) -> List[str]:
|
|
45
48
|
return [self.name]
|
|
46
49
|
|
|
47
50
|
def infer_type(self, data: pd.DataFrame) -> DtypeObj:
|
|
@@ -57,6 +60,12 @@ class Column:
|
|
|
57
60
|
def to_pretty_formula(self) -> str:
|
|
58
61
|
return self.to_formula()
|
|
59
62
|
|
|
63
|
+
def __eq__(self, value: object) -> bool:
|
|
64
|
+
if not isinstance(value, Column):
|
|
65
|
+
return False
|
|
66
|
+
else:
|
|
67
|
+
return self.name == value.name and self.calculate_all == value.calculate_all
|
|
68
|
+
|
|
60
69
|
|
|
61
70
|
class Feature:
|
|
62
71
|
def __init__(
|
|
@@ -125,6 +134,12 @@ class Feature:
|
|
|
125
134
|
for child in self.children:
|
|
126
135
|
child.delete_data()
|
|
127
136
|
|
|
137
|
+
def get_name_component(self, **kwargs) -> str:
|
|
138
|
+
return "_".join(ch.get_name_component(**kwargs) for ch in self.children) + "_" + self.get_op_display_name()
|
|
139
|
+
|
|
140
|
+
def get_op_display_name(self) -> str:
|
|
141
|
+
return self.op.alias or self.op.name.lower()
|
|
142
|
+
|
|
128
143
|
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
129
144
|
if self.cached_display_name is not None and cache:
|
|
130
145
|
return self.cached_display_name
|
|
@@ -132,11 +147,12 @@ class Feature:
|
|
|
132
147
|
if self.alias:
|
|
133
148
|
components = ["f_autofe", self.alias]
|
|
134
149
|
elif shorten and not self.op.is_unary:
|
|
135
|
-
components = ["f_autofe", self.
|
|
150
|
+
components = ["f_autofe", self.get_op_display_name()]
|
|
136
151
|
else:
|
|
137
|
-
|
|
152
|
+
child_components = [ch.get_name_component(**kwargs) for ch in self.children]
|
|
153
|
+
components = ["f_" + "_f_".join(child_components)] + [
|
|
138
154
|
"autofe",
|
|
139
|
-
self.
|
|
155
|
+
self.get_op_display_name(),
|
|
140
156
|
]
|
|
141
157
|
components.extend([str(self.display_index)] if self.display_index is not None else [])
|
|
142
158
|
display_name = "_".join(components)
|
|
@@ -306,8 +322,20 @@ class FeatureGroup:
|
|
|
306
322
|
main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
|
|
307
323
|
if isinstance(self.op, PandasOperand):
|
|
308
324
|
columns = self.get_columns()
|
|
309
|
-
|
|
310
|
-
|
|
325
|
+
lower_order_children = [
|
|
326
|
+
ch for f in self.children for ch in f.children if ch.get_display_name() != main_column
|
|
327
|
+
]
|
|
328
|
+
if any(isinstance(f, Feature) for f in lower_order_children):
|
|
329
|
+
child_data = pd.concat(
|
|
330
|
+
[data[main_column]] + [ch.calculate(data) for ch in lower_order_children], axis=1
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
child_data = data[columns]
|
|
334
|
+
|
|
335
|
+
new_data = self.op.calculate_group(child_data, main_column=main_column)
|
|
336
|
+
new_data.rename(
|
|
337
|
+
columns=dict(zip((c for c in columns if c != main_column), self.get_display_names())), inplace=True
|
|
338
|
+
)
|
|
311
339
|
else:
|
|
312
340
|
raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
|
|
313
341
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.296a3521.dev3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/ads_management/ads_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/normalizer/phone_normalizer.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/resource_bundle/__init__.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/resource_bundle/exceptions.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/sampler/random_under_sampler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/blocked_time_series.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/fallback_progress_bar.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3521.dev3 → upgini-1.1.296a3521.dev5}/src/upgini/utils/features_validator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|