upgini 1.2.56a3818.dev1__py3-none-any.whl → 1.2.56a3818.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.56a3818.dev1"
1
+ __version__ = "1.2.56a3818.dev3"
upgini/autofe/feature.py CHANGED
@@ -29,6 +29,15 @@ class Column:
29
29
  self.name = self._unhash(mapping.get(self.name) or self.name)
30
30
  return self
31
31
 
32
+ def _unhash(self, feature_name: str) -> str:
33
+ last_component_idx = feature_name.rfind("_")
34
+ if not feature_name.startswith("f_"):
35
+ return feature_name # etalon feature
36
+ elif last_component_idx == 1:
37
+ return feature_name[2:] # fully hashed name, cannot unhash
38
+ else:
39
+ return feature_name[2:last_component_idx]
40
+
32
41
  def delete_data(self):
33
42
  self.data = None
34
43
 
upgini/autofe/vector.py CHANGED
@@ -54,20 +54,31 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
54
54
  ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
55
55
  ts.set_index(date.name, inplace=True)
56
56
  ts = ts[ts.index.notna()].sort_index()
57
- if self.offset_size > 0:
58
- ts = ts.iloc[:, :-1].merge(
59
- ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
60
- left_index=True,
61
- right_index=True,
62
- how="left",
63
- )
64
- ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
57
+ ts = (
58
+ ts.groupby([c.name for c in data[1:-1]])
59
+ .apply(self._shift)[data[-1].name]
60
+ .to_frame()
61
+ .reset_index()
62
+ .set_index(date.name)
63
+ .groupby([c.name for c in data[1:-1]])
64
+ if len(data) > 2
65
+ else self._shift(ts)
66
+ )
65
67
  ts = self._aggregate(ts)
66
68
  ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
67
69
  ts.index = date.index
68
70
 
69
71
  return ts.iloc[:, -1]
70
72
 
73
+ def _shift(self, ts: pd.DataFrame) -> pd.DataFrame:
74
+ if self.offset_size > 0:
75
+ return ts.iloc[:, :-1].merge(
76
+ ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
77
+ left_index=True,
78
+ right_index=True,
79
+ )
80
+ return ts
81
+
71
82
  @abc.abstractmethod
72
83
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
73
84
  pass
@@ -93,12 +104,35 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
93
104
  )
94
105
 
95
106
  def to_formula(self) -> str:
96
- return f"roll_{self.window_size}{self.window_unit}_{self.aggregation}"
107
+ roll_component = f"roll_{self.window_size}{self.window_unit}"
108
+ if self.offset_size > 0:
109
+ roll_component += f"_offset_{self.offset_size}{self.offset_unit}"
110
+ return f"{roll_component}_{self.aggregation}"
97
111
 
98
112
  @classmethod
99
113
  def from_formula(cls, formula: str) -> Optional["Roll"]:
100
114
  import re
101
115
 
116
+ # Try matching pattern with offset first
117
+ pattern_with_offset = r"^roll_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])_(\w+)$"
118
+ match_with_offset = re.match(pattern_with_offset, formula)
119
+
120
+ if match_with_offset:
121
+ window_size = int(match_with_offset.group(1))
122
+ window_unit = match_with_offset.group(2)
123
+ offset_size = int(match_with_offset.group(3))
124
+ offset_unit = match_with_offset.group(4)
125
+ aggregation = match_with_offset.group(5)
126
+
127
+ return cls(
128
+ window_size=window_size,
129
+ window_unit=window_unit,
130
+ offset_size=offset_size,
131
+ offset_unit=offset_unit,
132
+ aggregation=aggregation,
133
+ )
134
+
135
+ # If no offset pattern found, try basic pattern
102
136
  pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
103
137
  match = re.match(pattern, formula)
104
138
 
@@ -133,12 +167,33 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
133
167
  lag_unit: str = "D"
134
168
 
135
169
  def to_formula(self) -> str:
136
- return f"lag_{self.lag_size}{self.lag_unit}"
170
+ lag_component = f"lag_{self.lag_size}{self.lag_unit}"
171
+ if self.offset_size > 0:
172
+ lag_component += f"_offset_{self.offset_size}{self.offset_unit}"
173
+ return lag_component
137
174
 
138
175
  @classmethod
139
176
  def from_formula(cls, formula: str) -> Optional["Lag"]:
140
177
  import re
141
178
 
179
+ # Try matching pattern with offset first
180
+ pattern_with_offset = r"^lag_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])$"
181
+ match_with_offset = re.match(pattern_with_offset, formula)
182
+
183
+ if match_with_offset:
184
+ lag_size = int(match_with_offset.group(1))
185
+ lag_unit = match_with_offset.group(2)
186
+ offset_size = int(match_with_offset.group(3))
187
+ offset_unit = match_with_offset.group(4)
188
+
189
+ return cls(
190
+ lag_size=lag_size,
191
+ lag_unit=lag_unit,
192
+ offset_size=offset_size,
193
+ offset_unit=offset_unit,
194
+ )
195
+
196
+ # If no offset pattern found, try basic pattern
142
197
  pattern = r"^lag_(\d+)([a-zA-Z])$"
143
198
  match = re.match(pattern, formula)
144
199
 
@@ -152,6 +207,12 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
152
207
 
153
208
  def get_params(self) -> Dict[str, Optional[str]]:
154
209
  res = super().get_params()
210
+ res.update(
211
+ {
212
+ "lag_size": self.lag_size,
213
+ "lag_unit": self.lag_unit,
214
+ }
215
+ )
155
216
  return res
156
217
 
157
218
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.56a3818.dev1
3
+ Version: 1.2.56a3818.dev3
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=9Q-YAgHCpn_n0_PUmuZMMX9GDhEwIxeK81U6HvLvWLA,33
1
+ upgini/__about__.py,sha256=XfQddTt7Eukn9oKzHOurK-q9Fsg4ob2XoIvu2NhKfS0,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=vT4JyHmafLNbj54SySXr93f5hNS6-t94aFslbBy-7No,33535
@@ -17,11 +17,11 @@ upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  upgini/autofe/all_operands.py,sha256=v0_NozalvvzeojSAA0d7UJ5INS654ZVaLn4S8djK6Ac,329
18
18
  upgini/autofe/binary.py,sha256=zMhtHVuGUAFLUqem-XiXqJj-GRXxS88tdz8tFuDfSNM,7659
19
19
  upgini/autofe/date.py,sha256=d-sijAD7dETfqIOCaZh1vhuVjsS_nqa-6dhjwkCdny4,10441
20
- upgini/autofe/feature.py,sha256=WFob1r-E5s1ano_ogzUZ9xnMjTBN26wGv9lcOX8XghI,14763
20
+ upgini/autofe/feature.py,sha256=l8A8E3BH2BmYvqEC81zbcIEfH6KEEhcesJ2BH4fn0-4,15140
21
21
  upgini/autofe/groupby.py,sha256=G48_sQZw016eGx3cOy8YQrEIOp95puWqYUpFWd-gdeM,3595
22
22
  upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
23
23
  upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
24
- upgini/autofe/vector.py,sha256=lndD4YOY0pO2va6X-2akITS2VD3pZ1dFpwiAEDMtBOc,4872
24
+ upgini/autofe/vector.py,sha256=udkg4pP7IIeLjt0Cg6rzEKUmGaubOnqsEz3bz9R6E44,7110
25
25
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
27
27
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
59
59
  upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.56a3818.dev1.dist-info/METADATA,sha256=R_ooFZF6RCW80rkva4Divg2KGhYXmSo1J6kKrNVzUWg,49065
63
- upgini-1.2.56a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
- upgini-1.2.56a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.56a3818.dev1.dist-info/RECORD,,
62
+ upgini-1.2.56a3818.dev3.dist-info/METADATA,sha256=wNOPrGCBYPZvYeVT6RQql1c7MDIE6ls83ZOtHoSFsIc,49065
63
+ upgini-1.2.56a3818.dev3.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
+ upgini-1.2.56a3818.dev3.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.56a3818.dev3.dist-info/RECORD,,