upgini 1.1.296a3521.dev10__py3-none-any.whl → 1.1.297__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.296a3521.dev10"
1
+ __version__ = "1.1.297"
@@ -1,14 +1,7 @@
1
1
  from typing import Dict
2
2
 
3
3
  from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
4
- from upgini.autofe.date import (
5
- DateDiff,
6
- DateDiffType2,
7
- DateListDiff,
8
- DateListDiffBounded,
9
- DatePercentile,
10
- DatePercentileMethod2,
11
- )
4
+ from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
12
5
  from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
13
6
  from upgini.autofe.operand import Operand
14
7
  from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Norm, Sigmoid, Sqrt, Square
@@ -57,7 +50,6 @@ ALL_OPERANDS: Dict[str, Operand] = {
57
50
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
58
51
  DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
59
52
  DatePercentile(),
60
- DatePercentileMethod2(),
61
53
  Norm(),
62
54
  ]
63
55
  }
upgini/autofe/date.py CHANGED
@@ -1,4 +1,3 @@
1
- import abc
2
1
  from typing import Any, Dict, List, Optional, Union
3
2
 
4
3
  import numpy as np
@@ -39,7 +38,6 @@ class DateDiffMixin(BaseModel):
39
38
 
40
39
  class DateDiff(PandasOperand, DateDiffMixin):
41
40
  name = "date_diff"
42
- alias = "date_diff_type1"
43
41
  is_binary = True
44
42
  has_symmetry_importance = True
45
43
 
@@ -161,45 +159,12 @@ class DateListDiffBounded(DateListDiff):
161
159
  return super()._agg(x)
162
160
 
163
161
 
164
- class DatePercentileBase(PandasOperand, abc.ABC):
162
+ class DatePercentile(PandasOperand):
163
+ name = "date_per"
165
164
  is_binary = True
166
165
  output_type = "float"
167
166
 
168
167
  date_unit: Optional[str] = None
169
-
170
- def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
171
- # Assuming that left is a date column, right is a feature column
172
- left = pd.to_datetime(left, unit=self.date_unit)
173
-
174
- bounds = self._get_bounds(left)
175
-
176
- return right.index.to_series().apply(lambda i: self._perc(right[i], bounds[i]))
177
-
178
- @abc.abstractmethod
179
- def _get_bounds(self, date_col: pd.Series) -> pd.Series:
180
- pass
181
-
182
- def _perc(self, f, bounds):
183
- hit = np.where(f >= bounds)[0]
184
- if hit.size > 0:
185
- return np.max(hit) + 1
186
- else:
187
- return np.nan
188
-
189
- def get_params(self) -> Dict[str, Optional[str]]:
190
- res = super().get_params()
191
- res.update(
192
- {
193
- "date_unit": self.date_unit,
194
- }
195
- )
196
- return res
197
-
198
-
199
- class DatePercentile(DatePercentileBase):
200
- name = "date_per"
201
- alias = "date_per_method1"
202
-
203
168
  zero_month: Optional[int]
204
169
  zero_year: Optional[int]
205
170
  zero_bounds: Optional[List[float]]
@@ -209,6 +174,7 @@ class DatePercentile(DatePercentileBase):
209
174
  res = super().get_params()
210
175
  res.update(
211
176
  {
177
+ "date_unit": self.date_unit,
212
178
  "zero_month": self.zero_month,
213
179
  "zero_year": self.zero_year,
214
180
  "zero_bounds": self.zero_bounds,
@@ -224,18 +190,22 @@ class DatePercentile(DatePercentileBase):
224
190
  elif isinstance(value, str):
225
191
  return value[1:-1].split(", ")
226
192
 
227
- def _get_bounds(self, date_col: pd.Series) -> pd.Series:
228
- months = date_col.dt.month
229
- years = date_col.dt.year
193
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
194
+ # Assuming that left is a date column, right is a feature column
195
+ left = pd.to_datetime(left, unit=self.date_unit)
196
+ months = left.dt.month
197
+ years = left.dt.year
230
198
 
231
199
  month_diffs = 12 * (years - (self.zero_year or 0)) + (months - (self.zero_month or 0))
232
- return month_diffs.apply(
200
+ bounds = month_diffs.apply(
233
201
  lambda d: np.array(self.zero_bounds if self.zero_bounds is not None else []) + d * self.step
234
202
  )
235
203
 
204
+ return right.index.to_series().apply(lambda i: self.__perc(right[i], bounds[i]))
236
205
 
237
- class DatePercentileMethod2(DatePercentileBase):
238
- name = "date_per_method2"
239
-
240
- def _get_bounds(self, date_col: pd.Series) -> pd.Series:
241
- pass
206
+ def __perc(self, f, bounds):
207
+ hit = np.where(f >= bounds)[0]
208
+ if hit.size > 0:
209
+ return np.max(hit) + 1
210
+ else:
211
+ return np.nan
upgini/autofe/feature.py CHANGED
@@ -16,9 +16,6 @@ class Column:
16
16
  self.data = data
17
17
  self.calculate_all = calculate_all
18
18
 
19
- def get_name_component(self, **kwargs) -> str:
20
- return self.name
21
-
22
19
  def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
23
20
  return self.name
24
21
 
@@ -44,7 +41,7 @@ class Column:
44
41
  def get_column_nodes(self) -> List["Column"]:
45
42
  return [self]
46
43
 
47
- def get_columns(self, **kwargs) -> List[str]:
44
+ def get_columns(self) -> List[str]:
48
45
  return [self.name]
49
46
 
50
47
  def infer_type(self, data: pd.DataFrame) -> DtypeObj:
@@ -60,12 +57,6 @@ class Column:
60
57
  def to_pretty_formula(self) -> str:
61
58
  return self.to_formula()
62
59
 
63
- def __eq__(self, value: object) -> bool:
64
- if not isinstance(value, Column):
65
- return False
66
- else:
67
- return self.name == value.name and self.calculate_all == value.calculate_all
68
-
69
60
 
70
61
  class Feature:
71
62
  def __init__(
@@ -134,9 +125,6 @@ class Feature:
134
125
  for child in self.children:
135
126
  child.delete_data()
136
127
 
137
- def get_op_display_name(self) -> str:
138
- return self.op.alias or self.op.name.lower()
139
-
140
128
  def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
141
129
  if self.cached_display_name is not None and cache:
142
130
  return self.cached_display_name
@@ -144,11 +132,11 @@ class Feature:
144
132
  if self.alias:
145
133
  components = ["f_autofe", self.alias]
146
134
  elif shorten and not self.op.is_unary:
147
- components = ["f_autofe", self.get_op_display_name()]
135
+ components = ["f_autofe", self.op.alias or self.op.name.lower()]
148
136
  else:
149
137
  components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
150
138
  "autofe",
151
- self.get_op_display_name(),
139
+ self.op.alias or self.op.name.lower(),
152
140
  ]
153
141
  components.extend([str(self.display_index)] if self.display_index is not None else [])
154
142
  display_name = "_".join(components)
@@ -318,21 +306,8 @@ class FeatureGroup:
318
306
  main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
319
307
  if isinstance(self.op, PandasOperand):
320
308
  columns = self.get_columns()
321
- lower_order_children = [
322
- ch for f in self.children for ch in f.children if ch.get_display_name() != main_column
323
- ]
324
- lower_order_names = [ch.get_display_name() for ch in lower_order_children]
325
- if any(isinstance(f, Feature) for f in lower_order_children):
326
- child_data = pd.concat(
327
- [data[main_column]] + [ch.calculate(data) for ch in lower_order_children],
328
- axis=1,
329
- )
330
- child_data.columns = [main_column] + lower_order_names
331
- else:
332
- child_data = data[columns]
333
-
334
- new_data = self.op.calculate_group(child_data, main_column=main_column)
335
- new_data.rename(columns=dict(zip(lower_order_names, self.get_display_names())), inplace=True)
309
+ new_data = self.op.calculate_group(data[columns], main_column=main_column)
310
+ new_data.rename(columns=dict(zip(columns, self.get_display_names())), inplace=True)
336
311
  else:
337
312
  raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
338
313
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.296a3521.dev10
3
+ Version: 1.1.297
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=JKvS85WNogY-2YIe8YIQ4nIWCAaaPwBgVr2LGrQzI7g,35
1
+ upgini/__about__.py,sha256=wXLUjYbwCXLCqMCPB8OqSFq3oPafpNboNnGSVzAPaUk,24
2
2
  upgini/__init__.py,sha256=ObEtjFkIssl83qeKNMLpIQygfwK8TzztwiI43YTsAP0,353
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
@@ -14,10 +14,10 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
14
14
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
15
15
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
16
16
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- upgini/autofe/all_operands.py,sha256=XbvgX2IU4aee9rJZ--d5MdmrfKhON_emle5-RU1qlEY,2506
17
+ upgini/autofe/all_operands.py,sha256=cpwUfhZWF9QBfrUyJ0xZ72iGYyt1eXIZQ46FB-7ZDI4,2421
18
18
  upgini/autofe/binary.py,sha256=8FXPJxN7fnC5wphO0Dp1tQCa0lFMSDGQGvBMkSIVAcE,4155
19
- upgini/autofe/date.py,sha256=8zYVhjl7jVS4xt-IjCgk9px2LHnACX2YlMlmDELlRTc,7943
20
- upgini/autofe/feature.py,sha256=nV1oJCT65nsAYFfXYCXYNYXZPaZVpqKJ09iCMTldooc,13500
19
+ upgini/autofe/date.py,sha256=qzk0NT332Q0vR1eRwTuNiMSrGE3ulh6Ic3QLBZqSdvw,7284
20
+ upgini/autofe/feature.py,sha256=_V9B74B3ue7eAYXSOt9JKhVC9klkAKks22MwnBRye_w,12487
21
21
  upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
22
22
  upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
23
23
  upgini/autofe/unary.py,sha256=ZWjLd-CUkNt_PpM8YuWLLipW1v_RdBlsl4JxXIVo9aM,3652
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.1.296a3521.dev10.dist-info/METADATA,sha256=RZTVD4L0dwwznJDxfpxjmCTZGzSQjE8lX1fw5CjP9ZA,48162
61
- upgini-1.1.296a3521.dev10.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
62
- upgini-1.1.296a3521.dev10.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.1.296a3521.dev10.dist-info/RECORD,,
60
+ upgini-1.1.297.dist-info/METADATA,sha256=RwOihmiasIoIOFrOuY-WcLPOc4Fgt5QlztJGwCg5QQ8,48151
61
+ upgini-1.1.297.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.1.297.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.1.297.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.2
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any