upgini 1.2.56a3675.dev1__py3-none-any.whl → 1.2.56a3818.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/date.py +0 -8
- upgini/autofe/feature.py +1 -10
- upgini/autofe/vector.py +18 -2
- upgini/features_enricher.py +1 -1
- {upgini-1.2.56a3675.dev1.dist-info → upgini-1.2.56a3818.dev1.dist-info}/METADATA +1 -1
- {upgini-1.2.56a3675.dev1.dist-info → upgini-1.2.56a3818.dev1.dist-info}/RECORD +9 -9
- {upgini-1.2.56a3675.dev1.dist-info → upgini-1.2.56a3818.dev1.dist-info}/WHEEL +0 -0
- {upgini-1.2.56a3675.dev1.dist-info → upgini-1.2.56a3818.dev1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.56a3818.dev1"
|
upgini/autofe/date.py
CHANGED
|
@@ -64,9 +64,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
64
64
|
return res
|
|
65
65
|
|
|
66
66
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
67
|
-
if left.isna().all() or right.isna().all():
|
|
68
|
-
return pd.Series([None] * len(left))
|
|
69
|
-
|
|
70
67
|
left = self._convert_to_date(left, self.left_unit)
|
|
71
68
|
right = self._convert_to_date(right, self.right_unit)
|
|
72
69
|
diff = self._convert_diff_to_unit(left.dt.date - right.dt.date)
|
|
@@ -145,9 +142,6 @@ class DateListDiff(PandasOperand, DateDiffMixin, ParametrizedOperand):
|
|
|
145
142
|
return cls(aggregation=aggregation)
|
|
146
143
|
|
|
147
144
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
148
|
-
if left.isna().all() or right.isna().all():
|
|
149
|
-
return pd.Series([None] * len(left), dtype=np.float64)
|
|
150
|
-
|
|
151
145
|
left = self._convert_to_date(left, self.left_unit)
|
|
152
146
|
right_mask = right.apply(lambda x: len(x) > 0)
|
|
153
147
|
mask = left.notna() & right.notna() & right_mask
|
|
@@ -236,8 +230,6 @@ class DatePercentileBase(PandasOperand, abc.ABC):
|
|
|
236
230
|
pass
|
|
237
231
|
|
|
238
232
|
def _perc(self, f, bounds):
|
|
239
|
-
if f is None or np.isnan(f):
|
|
240
|
-
return np.nan
|
|
241
233
|
hit = np.where(f >= np.array(bounds))[0]
|
|
242
234
|
if hit.size > 0:
|
|
243
235
|
return np.max(hit) + 1
|
upgini/autofe/feature.py
CHANGED
|
@@ -26,18 +26,9 @@ class Column:
|
|
|
26
26
|
return dict()
|
|
27
27
|
|
|
28
28
|
def rename_columns(self, mapping: Dict[str, str]) -> "Column":
|
|
29
|
-
self.name = mapping.get(self.name) or self.name
|
|
29
|
+
self.name = self._unhash(mapping.get(self.name) or self.name)
|
|
30
30
|
return self
|
|
31
31
|
|
|
32
|
-
def _unhash(self, feature_name: str) -> str:
|
|
33
|
-
last_component_idx = feature_name.rfind("_")
|
|
34
|
-
if not feature_name.startswith("f_"):
|
|
35
|
-
return feature_name # etalon feature
|
|
36
|
-
elif last_component_idx == 1:
|
|
37
|
-
return feature_name[2:] # fully hashed name, cannot unhash
|
|
38
|
-
else:
|
|
39
|
-
return feature_name[2:last_component_idx]
|
|
40
|
-
|
|
41
32
|
def delete_data(self):
|
|
42
33
|
self.data = None
|
|
43
34
|
|
upgini/autofe/vector.py
CHANGED
|
@@ -2,7 +2,11 @@ import abc
|
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
|
-
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from pydantic import field_validator as validator # V2
|
|
8
|
+
except ImportError:
|
|
9
|
+
from pydantic import validator # V1
|
|
6
10
|
|
|
7
11
|
from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
|
|
8
12
|
|
|
@@ -29,12 +33,16 @@ class Sum(PandasOperand, VectorizableMixin):
|
|
|
29
33
|
class TimeSeriesBase(PandasOperand, abc.ABC):
|
|
30
34
|
is_vector: bool = True
|
|
31
35
|
date_unit: Optional[str] = None
|
|
36
|
+
offset_size: int = 0
|
|
37
|
+
offset_unit: str = "D"
|
|
32
38
|
|
|
33
39
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
34
40
|
res = super().get_params()
|
|
35
41
|
res.update(
|
|
36
42
|
{
|
|
37
43
|
"date_unit": self.date_unit,
|
|
44
|
+
"offset_size": self.offset_size,
|
|
45
|
+
"offset_unit": self.offset_unit,
|
|
38
46
|
}
|
|
39
47
|
)
|
|
40
48
|
return res
|
|
@@ -46,6 +54,13 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
|
|
|
46
54
|
ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
|
|
47
55
|
ts.set_index(date.name, inplace=True)
|
|
48
56
|
ts = ts[ts.index.notna()].sort_index()
|
|
57
|
+
if self.offset_size > 0:
|
|
58
|
+
ts = ts.iloc[:, :-1].merge(
|
|
59
|
+
ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
|
|
60
|
+
left_index=True,
|
|
61
|
+
right_index=True,
|
|
62
|
+
how="left",
|
|
63
|
+
)
|
|
49
64
|
ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
|
|
50
65
|
ts = self._aggregate(ts)
|
|
51
66
|
ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
|
|
@@ -67,6 +82,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
|
|
|
67
82
|
window_unit: str = "D"
|
|
68
83
|
|
|
69
84
|
@validator("window_unit")
|
|
85
|
+
@classmethod
|
|
70
86
|
def validate_window_unit(cls, v: str) -> str:
|
|
71
87
|
try:
|
|
72
88
|
pd.tseries.frequencies.to_offset(v)
|
|
@@ -107,7 +123,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
|
|
|
107
123
|
return res
|
|
108
124
|
|
|
109
125
|
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
110
|
-
return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=
|
|
126
|
+
return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=1).agg(
|
|
111
127
|
_roll_aggregations.get(self.aggregation, self.aggregation)
|
|
112
128
|
)
|
|
113
129
|
|
upgini/features_enricher.py
CHANGED
|
@@ -3733,7 +3733,7 @@ if response.status_code == 200:
|
|
|
3733
3733
|
features_info_without_links = []
|
|
3734
3734
|
internal_features_info = []
|
|
3735
3735
|
|
|
3736
|
-
original_shaps = {fm.name: fm.shap_value for fm in features_meta}
|
|
3736
|
+
original_shaps = {original_names_dict.get(fm.name, fm.name): fm.shap_value for fm in features_meta}
|
|
3737
3737
|
|
|
3738
3738
|
if updated_shaps is not None:
|
|
3739
3739
|
for fm in features_meta:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.56a3818.dev1
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=9Q-YAgHCpn_n0_PUmuZMMX9GDhEwIxeK81U6HvLvWLA,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=vT4JyHmafLNbj54SySXr93f5hNS6-t94aFslbBy-7No,33535
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=FkAKQV_XOXTobwOXpdy9BPfRkL4fkgoNa2B6NniiCrs,201554
|
|
7
7
|
upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
|
|
@@ -16,12 +16,12 @@ upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo
|
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
upgini/autofe/all_operands.py,sha256=v0_NozalvvzeojSAA0d7UJ5INS654ZVaLn4S8djK6Ac,329
|
|
18
18
|
upgini/autofe/binary.py,sha256=zMhtHVuGUAFLUqem-XiXqJj-GRXxS88tdz8tFuDfSNM,7659
|
|
19
|
-
upgini/autofe/date.py,sha256=
|
|
20
|
-
upgini/autofe/feature.py,sha256=
|
|
19
|
+
upgini/autofe/date.py,sha256=d-sijAD7dETfqIOCaZh1vhuVjsS_nqa-6dhjwkCdny4,10441
|
|
20
|
+
upgini/autofe/feature.py,sha256=WFob1r-E5s1ano_ogzUZ9xnMjTBN26wGv9lcOX8XghI,14763
|
|
21
21
|
upgini/autofe/groupby.py,sha256=G48_sQZw016eGx3cOy8YQrEIOp95puWqYUpFWd-gdeM,3595
|
|
22
22
|
upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
|
|
23
23
|
upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
|
|
24
|
-
upgini/autofe/vector.py,sha256=
|
|
24
|
+
upgini/autofe/vector.py,sha256=lndD4YOY0pO2va6X-2akITS2VD3pZ1dFpwiAEDMtBOc,4872
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
65
|
-
upgini-1.2.
|
|
62
|
+
upgini-1.2.56a3818.dev1.dist-info/METADATA,sha256=R_ooFZF6RCW80rkva4Divg2KGhYXmSo1J6kKrNVzUWg,49065
|
|
63
|
+
upgini-1.2.56a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.56a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.56a3818.dev1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|