upgini 1.2.56a3675.dev1__py3-none-any.whl → 1.2.56a3818.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.56a3675.dev1"
1
+ __version__ = "1.2.56a3818.dev1"
upgini/autofe/date.py CHANGED
@@ -64,9 +64,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
64
64
  return res
65
65
 
66
66
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
67
- if left.isna().all() or right.isna().all():
68
- return pd.Series([None] * len(left))
69
-
70
67
  left = self._convert_to_date(left, self.left_unit)
71
68
  right = self._convert_to_date(right, self.right_unit)
72
69
  diff = self._convert_diff_to_unit(left.dt.date - right.dt.date)
@@ -145,9 +142,6 @@ class DateListDiff(PandasOperand, DateDiffMixin, ParametrizedOperand):
145
142
  return cls(aggregation=aggregation)
146
143
 
147
144
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
148
- if left.isna().all() or right.isna().all():
149
- return pd.Series([None] * len(left), dtype=np.float64)
150
-
151
145
  left = self._convert_to_date(left, self.left_unit)
152
146
  right_mask = right.apply(lambda x: len(x) > 0)
153
147
  mask = left.notna() & right.notna() & right_mask
@@ -236,8 +230,6 @@ class DatePercentileBase(PandasOperand, abc.ABC):
236
230
  pass
237
231
 
238
232
  def _perc(self, f, bounds):
239
- if f is None or np.isnan(f):
240
- return np.nan
241
233
  hit = np.where(f >= np.array(bounds))[0]
242
234
  if hit.size > 0:
243
235
  return np.max(hit) + 1
upgini/autofe/feature.py CHANGED
@@ -26,18 +26,9 @@ class Column:
26
26
  return dict()
27
27
 
28
28
  def rename_columns(self, mapping: Dict[str, str]) -> "Column":
29
- self.name = mapping.get(self.name) or self.name
29
+ self.name = self._unhash(mapping.get(self.name) or self.name)
30
30
  return self
31
31
 
32
- def _unhash(self, feature_name: str) -> str:
33
- last_component_idx = feature_name.rfind("_")
34
- if not feature_name.startswith("f_"):
35
- return feature_name # etalon feature
36
- elif last_component_idx == 1:
37
- return feature_name[2:] # fully hashed name, cannot unhash
38
- else:
39
- return feature_name[2:last_component_idx]
40
-
41
32
  def delete_data(self):
42
33
  self.data = None
43
34
 
upgini/autofe/vector.py CHANGED
@@ -2,7 +2,11 @@ import abc
2
2
  from typing import Dict, List, Optional
3
3
 
4
4
  import pandas as pd
5
- from pydantic import validator
5
+
6
+ try:
7
+ from pydantic import field_validator as validator # V2
8
+ except ImportError:
9
+ from pydantic import validator # V1
6
10
 
7
11
  from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
8
12
 
@@ -29,12 +33,16 @@ class Sum(PandasOperand, VectorizableMixin):
29
33
  class TimeSeriesBase(PandasOperand, abc.ABC):
30
34
  is_vector: bool = True
31
35
  date_unit: Optional[str] = None
36
+ offset_size: int = 0
37
+ offset_unit: str = "D"
32
38
 
33
39
  def get_params(self) -> Dict[str, Optional[str]]:
34
40
  res = super().get_params()
35
41
  res.update(
36
42
  {
37
43
  "date_unit": self.date_unit,
44
+ "offset_size": self.offset_size,
45
+ "offset_unit": self.offset_unit,
38
46
  }
39
47
  )
40
48
  return res
@@ -46,6 +54,13 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
46
54
  ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
47
55
  ts.set_index(date.name, inplace=True)
48
56
  ts = ts[ts.index.notna()].sort_index()
57
+ if self.offset_size > 0:
58
+ ts = ts.iloc[:, :-1].merge(
59
+ ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
60
+ left_index=True,
61
+ right_index=True,
62
+ how="left",
63
+ )
49
64
  ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
50
65
  ts = self._aggregate(ts)
51
66
  ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
@@ -67,6 +82,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
67
82
  window_unit: str = "D"
68
83
 
69
84
  @validator("window_unit")
85
+ @classmethod
70
86
  def validate_window_unit(cls, v: str) -> str:
71
87
  try:
72
88
  pd.tseries.frequencies.to_offset(v)
@@ -107,7 +123,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
107
123
  return res
108
124
 
109
125
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
110
- return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=self.window_size).agg(
126
+ return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=1).agg(
111
127
  _roll_aggregations.get(self.aggregation, self.aggregation)
112
128
  )
113
129
 
@@ -3733,7 +3733,7 @@ if response.status_code == 200:
3733
3733
  features_info_without_links = []
3734
3734
  internal_features_info = []
3735
3735
 
3736
- original_shaps = {fm.name: fm.shap_value for fm in features_meta}
3736
+ original_shaps = {original_names_dict.get(fm.name, fm.name): fm.shap_value for fm in features_meta}
3737
3737
 
3738
3738
  if updated_shaps is not None:
3739
3739
  for fm in features_meta:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.56a3675.dev1
3
+ Version: 1.2.56a3818.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=zutpbvOkIMghNnMan4pM_KmtKU8O_-m4L2jZD3YAlDU,33
1
+ upgini/__about__.py,sha256=9Q-YAgHCpn_n0_PUmuZMMX9GDhEwIxeK81U6HvLvWLA,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=vT4JyHmafLNbj54SySXr93f5hNS6-t94aFslbBy-7No,33535
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=Te4ZbFZ2RCEi9NHo1ddWaxfkTep_3O6Okct3U_DWeD0,201520
6
+ upgini/features_enricher.py,sha256=FkAKQV_XOXTobwOXpdy9BPfRkL4fkgoNa2B6NniiCrs,201554
7
7
  upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
@@ -16,12 +16,12 @@ upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo
16
16
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  upgini/autofe/all_operands.py,sha256=v0_NozalvvzeojSAA0d7UJ5INS654ZVaLn4S8djK6Ac,329
18
18
  upgini/autofe/binary.py,sha256=zMhtHVuGUAFLUqem-XiXqJj-GRXxS88tdz8tFuDfSNM,7659
19
- upgini/autofe/date.py,sha256=oykxfmny4LOr6m79IipOUCtk2JQSUdSCWHh8K9n7nek,10726
20
- upgini/autofe/feature.py,sha256=fGZcnn6X8jJwINz_JQ-HiRvT6o2f23rORXHavdJIIN8,15126
19
+ upgini/autofe/date.py,sha256=d-sijAD7dETfqIOCaZh1vhuVjsS_nqa-6dhjwkCdny4,10441
20
+ upgini/autofe/feature.py,sha256=WFob1r-E5s1ano_ogzUZ9xnMjTBN26wGv9lcOX8XghI,14763
21
21
  upgini/autofe/groupby.py,sha256=G48_sQZw016eGx3cOy8YQrEIOp95puWqYUpFWd-gdeM,3595
22
22
  upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
23
23
  upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
24
- upgini/autofe/vector.py,sha256=bvcop9b0uFFPfQ3FLTwXT2IYfxNl4dIfR8icvnBHvOA,4358
24
+ upgini/autofe/vector.py,sha256=lndD4YOY0pO2va6X-2akITS2VD3pZ1dFpwiAEDMtBOc,4872
25
25
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
27
27
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
59
59
  upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.56a3675.dev1.dist-info/METADATA,sha256=dzP80HeBsaFEEmIjuVeC7HWPO4UWTcEwiLqmPOE17-8,49065
63
- upgini-1.2.56a3675.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
- upgini-1.2.56a3675.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.56a3675.dev1.dist-info/RECORD,,
62
+ upgini-1.2.56a3818.dev1.dist-info/METADATA,sha256=R_ooFZF6RCW80rkva4Divg2KGhYXmSo1J6kKrNVzUWg,49065
63
+ upgini-1.2.56a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
+ upgini-1.2.56a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.56a3818.dev1.dist-info/RECORD,,