upgini 1.2.74__py3-none-any.whl → 1.2.74a3818.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.74"
1
+ __version__ = "1.2.74a3818.dev1"
upgini/autofe/binary.py CHANGED
@@ -146,8 +146,8 @@ class Distance(PandasOperator):
146
146
 
147
147
  # row-wise dot product, handling None values
148
148
  def __dot(self, left: pd.Series, right: pd.Series) -> pd.Series:
149
- left = left.apply(lambda x: np.array(x))
150
- right = right.apply(lambda x: np.array(x))
149
+ left = left.apply(lambda x: np.array(x).astype(np.float64))
150
+ right = right.apply(lambda x: np.array(x).astype(np.float64))
151
151
  res = (left.dropna() * right.dropna()).apply(np.sum)
152
152
  res = res.reindex(left.index.union(right.index))
153
153
  return res
upgini/autofe/date.py CHANGED
@@ -187,21 +187,16 @@ class DateListDiff(PandasOperator, DateDiffMixin, ParametrizedOperator):
187
187
  class DateListDiffBounded(DateListDiff, ParametrizedOperator):
188
188
  lower_bound: Optional[int] = None
189
189
  upper_bound: Optional[int] = None
190
- normalize: Optional[bool] = None
191
190
 
192
191
  def to_formula(self) -> str:
193
192
  lower_bound = "minusinf" if self.lower_bound is None else self.lower_bound
194
193
  upper_bound = "plusinf" if self.upper_bound is None else self.upper_bound
195
- norm = "_norm" if self.normalize else ""
196
- return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}{norm}"
194
+ return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}"
197
195
 
198
196
  @classmethod
199
197
  def from_formula(cls, formula: str) -> Optional["DateListDiffBounded"]:
200
198
  import re
201
199
 
202
- normalize = formula.endswith("_norm")
203
- formula = formula.replace("_norm", "")
204
-
205
200
  pattern = r"^date_diff_([^_]+)_((minusinf|\d+))_((plusinf|\d+))_(\w+)$"
206
201
  match = re.match(pattern, formula)
207
202
 
@@ -212,13 +207,8 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
212
207
  lower_bound = None if match.group(2) == "minusinf" else int(match.group(2))
213
208
  upper_bound = None if match.group(4) == "plusinf" else int(match.group(4))
214
209
  aggregation = match.group(6)
215
- return cls(
216
- diff_unit=diff_unit,
217
- lower_bound=lower_bound,
218
- upper_bound=upper_bound,
219
- aggregation=aggregation,
220
- normalize=normalize,
221
- )
210
+
211
+ return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
222
212
 
223
213
  def get_params(self) -> Dict[str, Optional[str]]:
224
214
  res = super().get_params()
@@ -226,20 +216,14 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
226
216
  res["lower_bound"] = str(self.lower_bound)
227
217
  if self.upper_bound is not None:
228
218
  res["upper_bound"] = str(self.upper_bound)
229
- if self.normalize is not None:
230
- res["normalize"] = str(self.normalize)
231
219
  return res
232
220
 
233
221
  def _agg(self, x):
234
- orig_len = len(x)
235
222
  x = x[
236
223
  (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
237
224
  & (x < (self.upper_bound if self.upper_bound is not None else np.inf))
238
225
  ]
239
- agg_res = super()._agg(x)
240
- if self.normalize and orig_len > 0:
241
- return agg_res / orig_len
242
- return agg_res
226
+ return super()._agg(x)
243
227
 
244
228
 
245
229
  class DatePercentileBase(PandasOperator, abc.ABC):
upgini/autofe/feature.py CHANGED
@@ -154,34 +154,24 @@ class Feature:
154
154
  for child in self.children:
155
155
  child.delete_data()
156
156
 
157
- def get_op_display_name(self, use_alias: bool = True) -> str:
158
- return (self.op.alias or self.op.to_formula()).lower() if use_alias else self.op.to_formula()
157
+ def get_op_display_name(self) -> str:
158
+ return (self.op.alias or self.op.to_formula()).lower()
159
159
 
160
- def get_display_name(self, cache: bool = True, shorten: bool = False, use_op_alias: bool = True, **kwargs) -> str:
160
+ def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
161
161
  if self.cached_display_name is not None and cache:
162
162
  return self.cached_display_name
163
163
 
164
164
  should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary else False
165
- components = []
165
+ prev_name = [self.children[0].get_op_display_name()] if should_stack_op else []
166
166
 
167
167
  if self.alias:
168
- components.extend(["f_autofe", self.alias])
169
- elif should_stack_op:
170
- components.extend(
171
- [
172
- self.children[0].get_display_name(
173
- cache=cache, shorten=shorten, use_op_alias=use_op_alias, **kwargs
174
- ),
175
- self.get_op_display_name(use_alias=use_op_alias),
176
- ]
177
- )
178
- elif shorten and not self.op.is_unary:
179
- components.extend(["f_autofe", self.get_op_display_name(use_alias=use_op_alias)])
168
+ components = ["f_autofe", self.alias]
169
+ elif shorten and (not self.op.is_unary or should_stack_op):
170
+ components = ["f_autofe"] + prev_name + [self.get_op_display_name()]
180
171
  else:
181
- components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
182
- "autofe",
183
- self.get_op_display_name(use_alias=use_op_alias),
184
- ]
172
+ components = (
173
+ ["f_" + "_f_".join(self.get_columns(**kwargs))] + ["autofe"] + prev_name + [self.get_op_display_name()]
174
+ )
185
175
  components.extend([str(self.display_index)] if self.display_index is not None else [])
186
176
  display_name = "_".join(components)
187
177
 
upgini/autofe/unary.py CHANGED
@@ -1,10 +1,8 @@
1
- import json
2
- from typing import Dict, List, Optional
1
+ from typing import Dict, Optional
3
2
  import numpy as np
4
3
  import pandas as pd
5
4
 
6
5
  from upgini.autofe.operator import PandasOperator, VectorizableMixin
7
- from upgini.autofe.utils import pydantic_validator
8
6
 
9
7
 
10
8
  class Abs(PandasOperator, VectorizableMixin):
@@ -155,38 +153,3 @@ class Embeddings(PandasOperator):
155
153
  is_unary: bool = True
156
154
  input_type: Optional[str] = "string"
157
155
  output_type: Optional[str] = "vector"
158
-
159
-
160
- class Bin(PandasOperator):
161
- name: str = "bin"
162
- is_unary: bool = True
163
- output_type: Optional[str] = "category"
164
- bin_bounds: List[int] = []
165
- is_categorical: bool = True
166
-
167
- def calculate_unary(self, data: pd.Series) -> pd.Series:
168
- return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype("category")
169
-
170
- def _bin(self, f, bounds):
171
- if f is None or np.isnan(f):
172
- return np.nan
173
- hit = np.where(f >= np.array(bounds))[0]
174
- if hit.size > 0:
175
- return np.max(hit) + 1
176
- else:
177
- return np.nan
178
-
179
- def get_params(self) -> Dict[str, Optional[str]]:
180
- res = super().get_params()
181
- res.update(
182
- {
183
- "bin_bounds": json.dumps(self.bin_bounds),
184
- }
185
- )
186
- return res
187
-
188
- @pydantic_validator("bin_bounds", mode="before")
189
- def parse_bin_bounds(cls, value):
190
- if isinstance(value, str):
191
- return json.loads(value)
192
- return value
upgini/autofe/vector.py CHANGED
@@ -22,12 +22,3 @@ class Sum(PandasOperator, VectorizableMixin):
22
22
 
23
23
  def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
24
24
  return pd.DataFrame(data).T.fillna(0).sum(axis=1)
25
-
26
-
27
- class Vectorize(PandasOperator, VectorizableMixin):
28
- name: str = "vectorize"
29
- is_vector: bool = True
30
- group_index: int = 0
31
-
32
- def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
33
- return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
upgini/metrics.py CHANGED
@@ -326,7 +326,7 @@ class EstimatorWrapper:
326
326
  for c in x.columns:
327
327
  if is_numeric_dtype(x[c]):
328
328
  x[c] = x[c].astype(float)
329
- elif not x[c].dtype == "category":
329
+ else:
330
330
  x[c] = x[c].astype(str)
331
331
 
332
332
  if not isinstance(y, pd.Series):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.74
3
+ Version: 1.2.74a3818.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=LYhBOE9Gp2v3W8-OC57ijtt8O3FDg9mFfSW0EIsbsG4,23
1
+ upgini/__about__.py,sha256=OJAwNDxLBKa-uno0o9LM57LzTEqZIlSuf5VB46GJths,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
@@ -6,7 +6,7 @@ upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
6
  upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
7
7
  upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
8
8
  upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
- upgini/metrics.py,sha256=7UhKUwO7bskcu9vgTIQoazE2qrOMhRuFyc6IOu1AJ3Y,39053
9
+ upgini/metrics.py,sha256=UNNA3H7wWATq-lTb9BChDdFc14MOYH9FTWY2Te4OU2o,39024
10
10
  upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -14,14 +14,14 @@ upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9Jvf
14
14
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
15
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
17
- upgini/autofe/binary.py,sha256=MnQuFiERpocjCPQUjOljlsq5FE-04GPfwtNjzvfNMyU,7671
18
- upgini/autofe/date.py,sha256=MM1S-6imNSzCDOhbNnmsc_bwSqUWBcS8vWAdHF8j1kY,11134
19
- upgini/autofe/feature.py,sha256=G_YgnsauIoaMgByx9JXDPiKc4nqs0pwWZUfvoIGMKxY,15305
17
+ upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
18
+ upgini/autofe/date.py,sha256=C86F7sPiscUGq2a45UtQA9ADWBWg0kt54mePHHzjbLE,10633
19
+ upgini/autofe/feature.py,sha256=y1x3wijhTVBmloayQAHiscqKU9Ll8kLcGm1PdvS357I,14910
20
20
  upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
21
21
  upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
22
- upgini/autofe/unary.py,sha256=Sx11IoHRh5nwyALzjgG9GQOrVNIs8NZ1JzunAJuN66A,5731
22
+ upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
23
23
  upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
24
- upgini/autofe/vector.py,sha256=zehv1J9ChHdZKWjKlkRf6RpfQMCJduZmqCEePYNUfkQ,943
24
+ upgini/autofe/vector.py,sha256=l0KdKg-txlZxDSE4hPPfCtfGQofYbl7oaABPr830sPI,667
25
25
  upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
26
26
  upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
27
27
  upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.74.dist-info/METADATA,sha256=NK_V10Btl-ASVNrgJZQKOukyMNxod7dz2q_ZwPywTbQ,49091
74
- upgini-1.2.74.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
- upgini-1.2.74.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.74.dist-info/RECORD,,
73
+ upgini-1.2.74a3818.dev1.dist-info/METADATA,sha256=J-UotQ8AHXGCacs9yJb2tQ0s_1Tm89spF2wZgUyysL4,49101
74
+ upgini-1.2.74a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
+ upgini-1.2.74a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.74a3818.dev1.dist-info/RECORD,,