upgini 1.2.74__py3-none-any.whl → 1.2.74a3818.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/binary.py +2 -2
- upgini/autofe/date.py +4 -20
- upgini/autofe/feature.py +10 -20
- upgini/autofe/unary.py +1 -38
- upgini/autofe/vector.py +0 -9
- upgini/metrics.py +1 -1
- {upgini-1.2.74.dist-info → upgini-1.2.74a3818.dev1.dist-info}/METADATA +1 -1
- {upgini-1.2.74.dist-info → upgini-1.2.74a3818.dev1.dist-info}/RECORD +11 -11
- {upgini-1.2.74.dist-info → upgini-1.2.74a3818.dev1.dist-info}/WHEEL +0 -0
- {upgini-1.2.74.dist-info → upgini-1.2.74a3818.dev1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.74a3818.dev1"
|
upgini/autofe/binary.py
CHANGED
@@ -146,8 +146,8 @@ class Distance(PandasOperator):
|
|
146
146
|
|
147
147
|
# row-wise dot product, handling None values
|
148
148
|
def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
149
|
-
left = left.apply(lambda x: np.array(x))
|
150
|
-
right = right.apply(lambda x: np.array(x))
|
149
|
+
left = left.apply(lambda x: np.array(x).astype(np.float64))
|
150
|
+
right = right.apply(lambda x: np.array(x).astype(np.float64))
|
151
151
|
res = (left.dropna() * right.dropna()).apply(np.sum)
|
152
152
|
res = res.reindex(left.index.union(right.index))
|
153
153
|
return res
|
upgini/autofe/date.py
CHANGED
@@ -187,21 +187,16 @@ class DateListDiff(PandasOperator, DateDiffMixin, ParametrizedOperator):
|
|
187
187
|
class DateListDiffBounded(DateListDiff, ParametrizedOperator):
|
188
188
|
lower_bound: Optional[int] = None
|
189
189
|
upper_bound: Optional[int] = None
|
190
|
-
normalize: Optional[bool] = None
|
191
190
|
|
192
191
|
def to_formula(self) -> str:
|
193
192
|
lower_bound = "minusinf" if self.lower_bound is None else self.lower_bound
|
194
193
|
upper_bound = "plusinf" if self.upper_bound is None else self.upper_bound
|
195
|
-
|
196
|
-
return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}{norm}"
|
194
|
+
return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}"
|
197
195
|
|
198
196
|
@classmethod
|
199
197
|
def from_formula(cls, formula: str) -> Optional["DateListDiffBounded"]:
|
200
198
|
import re
|
201
199
|
|
202
|
-
normalize = formula.endswith("_norm")
|
203
|
-
formula = formula.replace("_norm", "")
|
204
|
-
|
205
200
|
pattern = r"^date_diff_([^_]+)_((minusinf|\d+))_((plusinf|\d+))_(\w+)$"
|
206
201
|
match = re.match(pattern, formula)
|
207
202
|
|
@@ -212,13 +207,8 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
|
|
212
207
|
lower_bound = None if match.group(2) == "minusinf" else int(match.group(2))
|
213
208
|
upper_bound = None if match.group(4) == "plusinf" else int(match.group(4))
|
214
209
|
aggregation = match.group(6)
|
215
|
-
|
216
|
-
|
217
|
-
lower_bound=lower_bound,
|
218
|
-
upper_bound=upper_bound,
|
219
|
-
aggregation=aggregation,
|
220
|
-
normalize=normalize,
|
221
|
-
)
|
210
|
+
|
211
|
+
return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
|
222
212
|
|
223
213
|
def get_params(self) -> Dict[str, Optional[str]]:
|
224
214
|
res = super().get_params()
|
@@ -226,20 +216,14 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
|
|
226
216
|
res["lower_bound"] = str(self.lower_bound)
|
227
217
|
if self.upper_bound is not None:
|
228
218
|
res["upper_bound"] = str(self.upper_bound)
|
229
|
-
if self.normalize is not None:
|
230
|
-
res["normalize"] = str(self.normalize)
|
231
219
|
return res
|
232
220
|
|
233
221
|
def _agg(self, x):
|
234
|
-
orig_len = len(x)
|
235
222
|
x = x[
|
236
223
|
(x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
|
237
224
|
& (x < (self.upper_bound if self.upper_bound is not None else np.inf))
|
238
225
|
]
|
239
|
-
|
240
|
-
if self.normalize and orig_len > 0:
|
241
|
-
return agg_res / orig_len
|
242
|
-
return agg_res
|
226
|
+
return super()._agg(x)
|
243
227
|
|
244
228
|
|
245
229
|
class DatePercentileBase(PandasOperator, abc.ABC):
|
upgini/autofe/feature.py
CHANGED
@@ -154,34 +154,24 @@ class Feature:
|
|
154
154
|
for child in self.children:
|
155
155
|
child.delete_data()
|
156
156
|
|
157
|
-
def get_op_display_name(self
|
158
|
-
return (self.op.alias or self.op.to_formula()).lower()
|
157
|
+
def get_op_display_name(self) -> str:
|
158
|
+
return (self.op.alias or self.op.to_formula()).lower()
|
159
159
|
|
160
|
-
def get_display_name(self, cache: bool = True, shorten: bool = False,
|
160
|
+
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
161
161
|
if self.cached_display_name is not None and cache:
|
162
162
|
return self.cached_display_name
|
163
163
|
|
164
164
|
should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary else False
|
165
|
-
|
165
|
+
prev_name = [self.children[0].get_op_display_name()] if should_stack_op else []
|
166
166
|
|
167
167
|
if self.alias:
|
168
|
-
components
|
169
|
-
elif should_stack_op:
|
170
|
-
components.
|
171
|
-
[
|
172
|
-
self.children[0].get_display_name(
|
173
|
-
cache=cache, shorten=shorten, use_op_alias=use_op_alias, **kwargs
|
174
|
-
),
|
175
|
-
self.get_op_display_name(use_alias=use_op_alias),
|
176
|
-
]
|
177
|
-
)
|
178
|
-
elif shorten and not self.op.is_unary:
|
179
|
-
components.extend(["f_autofe", self.get_op_display_name(use_alias=use_op_alias)])
|
168
|
+
components = ["f_autofe", self.alias]
|
169
|
+
elif shorten and (not self.op.is_unary or should_stack_op):
|
170
|
+
components = ["f_autofe"] + prev_name + [self.get_op_display_name()]
|
180
171
|
else:
|
181
|
-
components =
|
182
|
-
"autofe"
|
183
|
-
|
184
|
-
]
|
172
|
+
components = (
|
173
|
+
["f_" + "_f_".join(self.get_columns(**kwargs))] + ["autofe"] + prev_name + [self.get_op_display_name()]
|
174
|
+
)
|
185
175
|
components.extend([str(self.display_index)] if self.display_index is not None else [])
|
186
176
|
display_name = "_".join(components)
|
187
177
|
|
upgini/autofe/unary.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
import
|
2
|
-
from typing import Dict, List, Optional
|
1
|
+
from typing import Dict, Optional
|
3
2
|
import numpy as np
|
4
3
|
import pandas as pd
|
5
4
|
|
6
5
|
from upgini.autofe.operator import PandasOperator, VectorizableMixin
|
7
|
-
from upgini.autofe.utils import pydantic_validator
|
8
6
|
|
9
7
|
|
10
8
|
class Abs(PandasOperator, VectorizableMixin):
|
@@ -155,38 +153,3 @@ class Embeddings(PandasOperator):
|
|
155
153
|
is_unary: bool = True
|
156
154
|
input_type: Optional[str] = "string"
|
157
155
|
output_type: Optional[str] = "vector"
|
158
|
-
|
159
|
-
|
160
|
-
class Bin(PandasOperator):
|
161
|
-
name: str = "bin"
|
162
|
-
is_unary: bool = True
|
163
|
-
output_type: Optional[str] = "category"
|
164
|
-
bin_bounds: List[int] = []
|
165
|
-
is_categorical: bool = True
|
166
|
-
|
167
|
-
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
168
|
-
return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype("category")
|
169
|
-
|
170
|
-
def _bin(self, f, bounds):
|
171
|
-
if f is None or np.isnan(f):
|
172
|
-
return np.nan
|
173
|
-
hit = np.where(f >= np.array(bounds))[0]
|
174
|
-
if hit.size > 0:
|
175
|
-
return np.max(hit) + 1
|
176
|
-
else:
|
177
|
-
return np.nan
|
178
|
-
|
179
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
180
|
-
res = super().get_params()
|
181
|
-
res.update(
|
182
|
-
{
|
183
|
-
"bin_bounds": json.dumps(self.bin_bounds),
|
184
|
-
}
|
185
|
-
)
|
186
|
-
return res
|
187
|
-
|
188
|
-
@pydantic_validator("bin_bounds", mode="before")
|
189
|
-
def parse_bin_bounds(cls, value):
|
190
|
-
if isinstance(value, str):
|
191
|
-
return json.loads(value)
|
192
|
-
return value
|
upgini/autofe/vector.py
CHANGED
@@ -22,12 +22,3 @@ class Sum(PandasOperator, VectorizableMixin):
|
|
22
22
|
|
23
23
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
24
24
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
25
|
-
|
26
|
-
|
27
|
-
class Vectorize(PandasOperator, VectorizableMixin):
|
28
|
-
name: str = "vectorize"
|
29
|
-
is_vector: bool = True
|
30
|
-
group_index: int = 0
|
31
|
-
|
32
|
-
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
33
|
-
return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
|
upgini/metrics.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=OJAwNDxLBKa-uno0o9LM57LzTEqZIlSuf5VB46GJths,33
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
@@ -6,7 +6,7 @@ upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
6
|
upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
|
7
7
|
upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=UNNA3H7wWATq-lTb9BChDdFc14MOYH9FTWY2Te4OU2o,39024
|
10
10
|
upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -14,14 +14,14 @@ upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9Jvf
|
|
14
14
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
15
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
|
17
|
-
upgini/autofe/binary.py,sha256=
|
18
|
-
upgini/autofe/date.py,sha256=
|
19
|
-
upgini/autofe/feature.py,sha256=
|
17
|
+
upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
|
18
|
+
upgini/autofe/date.py,sha256=C86F7sPiscUGq2a45UtQA9ADWBWg0kt54mePHHzjbLE,10633
|
19
|
+
upgini/autofe/feature.py,sha256=y1x3wijhTVBmloayQAHiscqKU9Ll8kLcGm1PdvS357I,14910
|
20
20
|
upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
|
21
21
|
upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
|
22
|
-
upgini/autofe/unary.py,sha256=
|
22
|
+
upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
|
23
23
|
upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
|
24
|
-
upgini/autofe/vector.py,sha256=
|
24
|
+
upgini/autofe/vector.py,sha256=l0KdKg-txlZxDSE4hPPfCtfGQofYbl7oaABPr830sPI,667
|
25
25
|
upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
|
26
26
|
upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
|
27
27
|
upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.74a3818.dev1.dist-info/METADATA,sha256=J-UotQ8AHXGCacs9yJb2tQ0s_1Tm89spF2wZgUyysL4,49101
|
74
|
+
upgini-1.2.74a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
75
|
+
upgini-1.2.74a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.74a3818.dev1.dist-info/RECORD,,
|
File without changes
|
File without changes
|