upgini 1.2.56a3818.dev1__py3-none-any.whl → 1.2.56a3818.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/vector.py +71 -10
- {upgini-1.2.56a3818.dev1.dist-info → upgini-1.2.56a3818.dev2.dist-info}/METADATA +1 -1
- {upgini-1.2.56a3818.dev1.dist-info → upgini-1.2.56a3818.dev2.dist-info}/RECORD +6 -6
- {upgini-1.2.56a3818.dev1.dist-info → upgini-1.2.56a3818.dev2.dist-info}/WHEEL +0 -0
- {upgini-1.2.56a3818.dev1.dist-info → upgini-1.2.56a3818.dev2.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.56a3818.
|
|
1
|
+
__version__ = "1.2.56a3818.dev2"
|
upgini/autofe/vector.py
CHANGED
|
@@ -54,20 +54,31 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
|
|
|
54
54
|
ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
|
|
55
55
|
ts.set_index(date.name, inplace=True)
|
|
56
56
|
ts = ts[ts.index.notna()].sort_index()
|
|
57
|
-
|
|
58
|
-
ts
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
)
|
|
64
|
-
|
|
57
|
+
ts = (
|
|
58
|
+
ts.groupby([c.name for c in data[1:-1]])
|
|
59
|
+
.apply(self._shift)[data[-1].name]
|
|
60
|
+
.to_frame()
|
|
61
|
+
.reset_index()
|
|
62
|
+
.set_index(date.name)
|
|
63
|
+
.groupby([c.name for c in data[1:-1]])
|
|
64
|
+
if len(data) > 2
|
|
65
|
+
else self._shift(ts)
|
|
66
|
+
)
|
|
65
67
|
ts = self._aggregate(ts)
|
|
66
68
|
ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
|
|
67
69
|
ts.index = date.index
|
|
68
70
|
|
|
69
71
|
return ts.iloc[:, -1]
|
|
70
72
|
|
|
73
|
+
def _shift(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
74
|
+
if self.offset_size > 0:
|
|
75
|
+
return ts.iloc[:, :-1].merge(
|
|
76
|
+
ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
|
|
77
|
+
left_index=True,
|
|
78
|
+
right_index=True,
|
|
79
|
+
)
|
|
80
|
+
return ts
|
|
81
|
+
|
|
71
82
|
@abc.abstractmethod
|
|
72
83
|
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
73
84
|
pass
|
|
@@ -93,12 +104,35 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
|
|
|
93
104
|
)
|
|
94
105
|
|
|
95
106
|
def to_formula(self) -> str:
|
|
96
|
-
|
|
107
|
+
roll_component = f"roll_{self.window_size}{self.window_unit}"
|
|
108
|
+
if self.offset_size > 0:
|
|
109
|
+
roll_component += f"_offset_{self.offset_size}{self.offset_unit}"
|
|
110
|
+
return f"{roll_component}_{self.aggregation}"
|
|
97
111
|
|
|
98
112
|
@classmethod
|
|
99
113
|
def from_formula(cls, formula: str) -> Optional["Roll"]:
|
|
100
114
|
import re
|
|
101
115
|
|
|
116
|
+
# Try matching pattern with offset first
|
|
117
|
+
pattern_with_offset = r"^roll_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])_(\w+)$"
|
|
118
|
+
match_with_offset = re.match(pattern_with_offset, formula)
|
|
119
|
+
|
|
120
|
+
if match_with_offset:
|
|
121
|
+
window_size = int(match_with_offset.group(1))
|
|
122
|
+
window_unit = match_with_offset.group(2)
|
|
123
|
+
offset_size = int(match_with_offset.group(3))
|
|
124
|
+
offset_unit = match_with_offset.group(4)
|
|
125
|
+
aggregation = match_with_offset.group(5)
|
|
126
|
+
|
|
127
|
+
return cls(
|
|
128
|
+
window_size=window_size,
|
|
129
|
+
window_unit=window_unit,
|
|
130
|
+
offset_size=offset_size,
|
|
131
|
+
offset_unit=offset_unit,
|
|
132
|
+
aggregation=aggregation,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# If no offset pattern found, try basic pattern
|
|
102
136
|
pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
|
|
103
137
|
match = re.match(pattern, formula)
|
|
104
138
|
|
|
@@ -133,12 +167,33 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
|
|
|
133
167
|
lag_unit: str = "D"
|
|
134
168
|
|
|
135
169
|
def to_formula(self) -> str:
|
|
136
|
-
|
|
170
|
+
lag_component = f"lag_{self.lag_size}{self.lag_unit}"
|
|
171
|
+
if self.offset_size > 0:
|
|
172
|
+
lag_component += f"_offset_{self.offset_size}{self.offset_unit}"
|
|
173
|
+
return lag_component
|
|
137
174
|
|
|
138
175
|
@classmethod
|
|
139
176
|
def from_formula(cls, formula: str) -> Optional["Lag"]:
|
|
140
177
|
import re
|
|
141
178
|
|
|
179
|
+
# Try matching pattern with offset first
|
|
180
|
+
pattern_with_offset = r"^lag_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])$"
|
|
181
|
+
match_with_offset = re.match(pattern_with_offset, formula)
|
|
182
|
+
|
|
183
|
+
if match_with_offset:
|
|
184
|
+
lag_size = int(match_with_offset.group(1))
|
|
185
|
+
lag_unit = match_with_offset.group(2)
|
|
186
|
+
offset_size = int(match_with_offset.group(3))
|
|
187
|
+
offset_unit = match_with_offset.group(4)
|
|
188
|
+
|
|
189
|
+
return cls(
|
|
190
|
+
lag_size=lag_size,
|
|
191
|
+
lag_unit=lag_unit,
|
|
192
|
+
offset_size=offset_size,
|
|
193
|
+
offset_unit=offset_unit,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# If no offset pattern found, try basic pattern
|
|
142
197
|
pattern = r"^lag_(\d+)([a-zA-Z])$"
|
|
143
198
|
match = re.match(pattern, formula)
|
|
144
199
|
|
|
@@ -152,6 +207,12 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
|
|
|
152
207
|
|
|
153
208
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
154
209
|
res = super().get_params()
|
|
210
|
+
res.update(
|
|
211
|
+
{
|
|
212
|
+
"lag_size": self.lag_size,
|
|
213
|
+
"lag_unit": self.lag_unit,
|
|
214
|
+
}
|
|
215
|
+
)
|
|
155
216
|
return res
|
|
156
217
|
|
|
157
218
|
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.56a3818.
|
|
3
|
+
Version: 1.2.56a3818.dev2
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=B35mYNQAFpDko1Bk1FrsuvEXXmEaDk9hG_5GrMTV4IA,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=vT4JyHmafLNbj54SySXr93f5hNS6-t94aFslbBy-7No,33535
|
|
@@ -21,7 +21,7 @@ upgini/autofe/feature.py,sha256=WFob1r-E5s1ano_ogzUZ9xnMjTBN26wGv9lcOX8XghI,1476
|
|
|
21
21
|
upgini/autofe/groupby.py,sha256=G48_sQZw016eGx3cOy8YQrEIOp95puWqYUpFWd-gdeM,3595
|
|
22
22
|
upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
|
|
23
23
|
upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
|
|
24
|
-
upgini/autofe/vector.py,sha256=
|
|
24
|
+
upgini/autofe/vector.py,sha256=udkg4pP7IIeLjt0Cg6rzEKUmGaubOnqsEz3bz9R6E44,7110
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.56a3818.
|
|
63
|
-
upgini-1.2.56a3818.
|
|
64
|
-
upgini-1.2.56a3818.
|
|
65
|
-
upgini-1.2.56a3818.
|
|
62
|
+
upgini-1.2.56a3818.dev2.dist-info/METADATA,sha256=V0b7BNTjV7HPOrPO34dKgjBeqORu5DkE62JZN-ub6gQ,49065
|
|
63
|
+
upgini-1.2.56a3818.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.56a3818.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.56a3818.dev2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|