upgini 1.2.56a3675.dev2__tar.gz → 1.2.56a3675.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/PKG-INFO +1 -1
  2. upgini-1.2.56a3675.dev3/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/vector.py +82 -5
  4. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/features_enricher.py +1 -1
  5. upgini-1.2.56a3675.dev2/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/.gitignore +0 -0
  7. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/LICENSE +0 -0
  8. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/README.md +0 -0
  9. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/pyproject.toml +0 -0
  10. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/all_operands.py +0 -0
  16. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/operand.py +0 -0
  21. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/unary.py +0 -0
  22. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/data_source/__init__.py +0 -0
  23. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/data_source/data_source_publisher.py +0 -0
  24. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/dataset.py +0 -0
  25. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/errors.py +0 -0
  26. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/http.py +0 -0
  27. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/metadata.py +0 -0
  31. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/metrics.py +0 -0
  32. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/normalizer/normalize_utils.py +0 -0
  34. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/search_task.py +0 -0
  43. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/spinner.py +0 -0
  44. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  45. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/feature_info.py +0 -0
  57. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/features_validator.py +0 -0
  58. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/format.py +0 -0
  59. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/ip_utils.py +0 -0
  60. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.56a3675.dev2 → upgini-1.2.56a3675.dev3}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.56a3675.dev2
3
+ Version: 1.2.56a3675.dev3
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.56a3675.dev3"
@@ -2,7 +2,11 @@ import abc
2
2
  from typing import Dict, List, Optional
3
3
 
4
4
  import pandas as pd
5
- from pydantic import validator
5
+
6
+ try:
7
+ from pydantic import field_validator as validator # V2
8
+ except ImportError:
9
+ from pydantic import validator # V1
6
10
 
7
11
  from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
8
12
 
@@ -29,12 +33,16 @@ class Sum(PandasOperand, VectorizableMixin):
29
33
  class TimeSeriesBase(PandasOperand, abc.ABC):
30
34
  is_vector: bool = True
31
35
  date_unit: Optional[str] = None
36
+ offset_size: int = 0
37
+ offset_unit: str = "D"
32
38
 
33
39
  def get_params(self) -> Dict[str, Optional[str]]:
34
40
  res = super().get_params()
35
41
  res.update(
36
42
  {
37
43
  "date_unit": self.date_unit,
44
+ "offset_size": self.offset_size,
45
+ "offset_unit": self.offset_unit,
38
46
  }
39
47
  )
40
48
  return res
@@ -46,13 +54,31 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
46
54
  ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
47
55
  ts.set_index(date.name, inplace=True)
48
56
  ts = ts[ts.index.notna()].sort_index()
49
- ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
57
+ ts = (
58
+ ts.groupby([c.name for c in data[1:-1]])
59
+ .apply(self._shift)[data[-1].name]
60
+ .to_frame()
61
+ .reset_index()
62
+ .set_index(date.name)
63
+ .groupby([c.name for c in data[1:-1]])
64
+ if len(data) > 2
65
+ else self._shift(ts)
66
+ )
50
67
  ts = self._aggregate(ts)
51
68
  ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
52
69
  ts.index = date.index
53
70
 
54
71
  return ts.iloc[:, -1]
55
72
 
73
+ def _shift(self, ts: pd.DataFrame) -> pd.DataFrame:
74
+ if self.offset_size > 0:
75
+ return ts.iloc[:, :-1].merge(
76
+ ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
77
+ left_index=True,
78
+ right_index=True,
79
+ )
80
+ return ts
81
+
56
82
  @abc.abstractmethod
57
83
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
58
84
  pass
@@ -67,6 +93,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
67
93
  window_unit: str = "D"
68
94
 
69
95
  @validator("window_unit")
96
+ @classmethod
70
97
  def validate_window_unit(cls, v: str) -> str:
71
98
  try:
72
99
  pd.tseries.frequencies.to_offset(v)
@@ -77,12 +104,35 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
77
104
  )
78
105
 
79
106
  def to_formula(self) -> str:
80
- return f"roll_{self.window_size}{self.window_unit}_{self.aggregation}"
107
+ roll_component = f"roll_{self.window_size}{self.window_unit}"
108
+ if self.offset_size > 0:
109
+ roll_component += f"_offset_{self.offset_size}{self.offset_unit}"
110
+ return f"{roll_component}_{self.aggregation}"
81
111
 
82
112
  @classmethod
83
113
  def from_formula(cls, formula: str) -> Optional["Roll"]:
84
114
  import re
85
115
 
116
+ # Try matching pattern with offset first
117
+ pattern_with_offset = r"^roll_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])_(\w+)$"
118
+ match_with_offset = re.match(pattern_with_offset, formula)
119
+
120
+ if match_with_offset:
121
+ window_size = int(match_with_offset.group(1))
122
+ window_unit = match_with_offset.group(2)
123
+ offset_size = int(match_with_offset.group(3))
124
+ offset_unit = match_with_offset.group(4)
125
+ aggregation = match_with_offset.group(5)
126
+
127
+ return cls(
128
+ window_size=window_size,
129
+ window_unit=window_unit,
130
+ offset_size=offset_size,
131
+ offset_unit=offset_unit,
132
+ aggregation=aggregation,
133
+ )
134
+
135
+ # If no offset pattern found, try basic pattern
86
136
  pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
87
137
  match = re.match(pattern, formula)
88
138
 
@@ -107,7 +157,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
107
157
  return res
108
158
 
109
159
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
110
- return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=self.window_size).agg(
160
+ return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=1).agg(
111
161
  _roll_aggregations.get(self.aggregation, self.aggregation)
112
162
  )
113
163
 
@@ -117,12 +167,33 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
117
167
  lag_unit: str = "D"
118
168
 
119
169
  def to_formula(self) -> str:
120
- return f"lag_{self.lag_size}{self.lag_unit}"
170
+ lag_component = f"lag_{self.lag_size}{self.lag_unit}"
171
+ if self.offset_size > 0:
172
+ lag_component += f"_offset_{self.offset_size}{self.offset_unit}"
173
+ return lag_component
121
174
 
122
175
  @classmethod
123
176
  def from_formula(cls, formula: str) -> Optional["Lag"]:
124
177
  import re
125
178
 
179
+ # Try matching pattern with offset first
180
+ pattern_with_offset = r"^lag_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])$"
181
+ match_with_offset = re.match(pattern_with_offset, formula)
182
+
183
+ if match_with_offset:
184
+ lag_size = int(match_with_offset.group(1))
185
+ lag_unit = match_with_offset.group(2)
186
+ offset_size = int(match_with_offset.group(3))
187
+ offset_unit = match_with_offset.group(4)
188
+
189
+ return cls(
190
+ lag_size=lag_size,
191
+ lag_unit=lag_unit,
192
+ offset_size=offset_size,
193
+ offset_unit=offset_unit,
194
+ )
195
+
196
+ # If no offset pattern found, try basic pattern
126
197
  pattern = r"^lag_(\d+)([a-zA-Z])$"
127
198
  match = re.match(pattern, formula)
128
199
 
@@ -136,6 +207,12 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
136
207
 
137
208
  def get_params(self) -> Dict[str, Optional[str]]:
138
209
  res = super().get_params()
210
+ res.update(
211
+ {
212
+ "lag_size": self.lag_size,
213
+ "lag_unit": self.lag_unit,
214
+ }
215
+ )
139
216
  return res
140
217
 
141
218
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
@@ -3733,7 +3733,7 @@ if response.status_code == 200:
3733
3733
  features_info_without_links = []
3734
3734
  internal_features_info = []
3735
3735
 
3736
- original_shaps = {fm.name: fm.shap_value for fm in features_meta}
3736
+ original_shaps = {original_names_dict.get(fm.name, fm.name): fm.shap_value for fm in features_meta}
3737
3737
 
3738
3738
  if updated_shaps is not None:
3739
3739
  for fm in features_meta:
@@ -1 +0,0 @@
1
- __version__ = "1.2.56a3675.dev2"