upgini 1.1.296a3511.dev3__tar.gz → 1.1.296a3511.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/PKG-INFO +1 -1
- upgini-1.1.296a3511.dev5/src/upgini/__about__.py +1 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/binary.py +11 -1
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/date.py +12 -2
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/feature.py +7 -3
- upgini-1.1.296a3511.dev3/src/upgini/__about__.py +0 -1
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/.gitignore +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/LICENSE +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/README.md +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/pyproject.toml +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/ads.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/dataset.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/errors.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/features_enricher.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/http.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/lazy_import.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/metadata.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/metrics.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/search_task.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/spinner.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.296a3511.
|
|
3
|
+
Version: 1.1.296a3511.dev5
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.296a3511.dev5"
|
|
@@ -142,9 +142,16 @@ class Distance(PandasOperand):
|
|
|
142
142
|
has_symmetry_importance = True
|
|
143
143
|
|
|
144
144
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
145
|
-
return
|
|
145
|
+
return pd.Series(
|
|
146
|
+
1 - self.__dot(left, right) / (self.__dot(left, left) * self.__dot(right, right)), index=left.index
|
|
147
|
+
)
|
|
146
148
|
|
|
149
|
+
# row-wise dot product
|
|
150
|
+
def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
151
|
+
return (left * right).apply(np.sum)
|
|
147
152
|
|
|
153
|
+
|
|
154
|
+
# Left for backward compatibility
|
|
148
155
|
class Sim(Distance):
|
|
149
156
|
name = "sim"
|
|
150
157
|
is_binary = True
|
|
@@ -152,6 +159,9 @@ class Sim(Distance):
|
|
|
152
159
|
is_symmetrical = True
|
|
153
160
|
has_symmetry_importance = True
|
|
154
161
|
|
|
162
|
+
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
163
|
+
return 1 - super().calculate_binary(left, right)
|
|
164
|
+
|
|
155
165
|
|
|
156
166
|
class StringSim(PandasOperand, abc.ABC):
|
|
157
167
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
@@ -41,6 +41,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
41
41
|
is_binary = True
|
|
42
42
|
has_symmetry_importance = True
|
|
43
43
|
|
|
44
|
+
replace_negative: bool = False
|
|
45
|
+
|
|
44
46
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
45
47
|
res = super().get_params()
|
|
46
48
|
res.update(
|
|
@@ -48,6 +50,7 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
48
50
|
"diff_unit": self.diff_unit,
|
|
49
51
|
"left_unit": self.left_unit,
|
|
50
52
|
"right_unit": self.right_unit,
|
|
53
|
+
"replace_negative": self.replace_negative,
|
|
51
54
|
}
|
|
52
55
|
)
|
|
53
56
|
return res
|
|
@@ -59,7 +62,8 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
59
62
|
return self.__replace_negative(diff)
|
|
60
63
|
|
|
61
64
|
def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
|
|
62
|
-
|
|
65
|
+
if self.replace_negative:
|
|
66
|
+
x[x < 0] = None
|
|
63
67
|
return x
|
|
64
68
|
|
|
65
69
|
|
|
@@ -99,13 +103,19 @@ _ext_aggregations = {"nunique": (lambda x: len(np.unique(x)), 0), "count": (len,
|
|
|
99
103
|
class DateListDiff(PandasOperand, DateDiffMixin):
|
|
100
104
|
is_binary = True
|
|
101
105
|
has_symmetry_importance = True
|
|
106
|
+
|
|
102
107
|
aggregation: str
|
|
108
|
+
replace_negative: bool = False
|
|
103
109
|
|
|
104
110
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
105
111
|
res = super().get_params()
|
|
106
112
|
res.update(
|
|
107
113
|
{
|
|
108
114
|
"aggregation": self.aggregation,
|
|
115
|
+
"diff_unit": self.diff_unit,
|
|
116
|
+
"left_unit": self.left_unit,
|
|
117
|
+
"right_unit": self.right_unit,
|
|
118
|
+
"replace_negative": self.replace_negative,
|
|
109
119
|
}
|
|
110
120
|
)
|
|
111
121
|
return res
|
|
@@ -123,7 +133,7 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
123
133
|
|
|
124
134
|
def _diff(self, x: TimedeltaArray):
|
|
125
135
|
x = self._convert_diff_to_unit(x)
|
|
126
|
-
return x[x > 0]
|
|
136
|
+
return x[x > 0] if self.replace_negative else x
|
|
127
137
|
|
|
128
138
|
def _agg(self, x):
|
|
129
139
|
method = getattr(np, self.aggregation, None)
|
|
@@ -125,18 +125,22 @@ class Feature:
|
|
|
125
125
|
for child in self.children:
|
|
126
126
|
child.delete_data()
|
|
127
127
|
|
|
128
|
+
def get_op_display_name(self) -> str:
|
|
129
|
+
return self.op.alias or self.op.name.lower()
|
|
130
|
+
|
|
128
131
|
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
129
132
|
if self.cached_display_name is not None and cache:
|
|
130
133
|
return self.cached_display_name
|
|
131
134
|
|
|
132
135
|
if self.alias:
|
|
133
136
|
components = ["f_autofe", self.alias]
|
|
134
|
-
elif shorten and not self.op.is_unary:
|
|
135
|
-
|
|
137
|
+
elif shorten and not (self.op.is_unary and all(isinstance(c, Column) for c in self.children)):
|
|
138
|
+
prev_name = [self.children[0].get_op_display_name()] if self.op.is_unary else []
|
|
139
|
+
components = ["f_autofe"] + prev_name + [self.get_op_display_name()]
|
|
136
140
|
else:
|
|
137
141
|
components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
|
|
138
142
|
"autofe",
|
|
139
|
-
self.
|
|
143
|
+
self.get_op_display_name(),
|
|
140
144
|
]
|
|
141
145
|
components.extend([str(self.display_index)] if self.display_index is not None else [])
|
|
142
146
|
display_name = "_".join(components)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.296a3511.dev3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/ads_management/ads_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/normalizer/phone_normalizer.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/resource_bundle/__init__.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/resource_bundle/exceptions.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/sampler/random_under_sampler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/blocked_time_series.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/fallback_progress_bar.py
RENAMED
|
File without changes
|
{upgini-1.1.296a3511.dev3 → upgini-1.1.296a3511.dev5}/src/upgini/utils/features_validator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|