upgini 1.2.56a3818.dev1__tar.gz → 1.2.56a3818.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/PKG-INFO +1 -1
  2. upgini-1.2.56a3818.dev2/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/vector.py +71 -10
  4. upgini-1.2.56a3818.dev1/src/upgini/__about__.py +0 -1
  5. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/.gitignore +0 -0
  6. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/LICENSE +0 -0
  7. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/README.md +0 -0
  8. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/pyproject.toml +0 -0
  9. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/__init__.py +0 -0
  10. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/ads.py +0 -0
  11. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/__init__.py +0 -0
  14. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/all_operands.py +0 -0
  15. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/binary.py +0 -0
  16. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/operand.py +0 -0
  20. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/autofe/unary.py +0 -0
  21. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/data_source/__init__.py +0 -0
  22. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/data_source/data_source_publisher.py +0 -0
  23. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/dataset.py +0 -0
  24. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/errors.py +0 -0
  25. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/features_enricher.py +0 -0
  26. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/http.py +0 -0
  27. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/lazy_import.py +0 -0
  28. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/mdc/__init__.py +0 -0
  29. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/mdc/context.py +0 -0
  30. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/metadata.py +0 -0
  31. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/metrics.py +0 -0
  32. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/normalizer/__init__.py +0 -0
  33. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/normalizer/normalize_utils.py +0 -0
  34. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/resource_bundle/__init__.py +0 -0
  35. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/resource_bundle/exceptions.py +0 -0
  36. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/resource_bundle/strings.properties +0 -0
  37. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  38. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/sampler/__init__.py +0 -0
  39. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/sampler/base.py +0 -0
  40. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/sampler/random_under_sampler.py +0 -0
  41. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/sampler/utils.py +0 -0
  42. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/search_task.py +0 -0
  43. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/spinner.py +0 -0
  44. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  45. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/feature_info.py +0 -0
  57. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/features_validator.py +0 -0
  58. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/format.py +0 -0
  59. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/ip_utils.py +0 -0
  60. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.56a3818.dev1 → upgini-1.2.56a3818.dev2}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.56a3818.dev1
3
+ Version: 1.2.56a3818.dev2
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.56a3818.dev2"
@@ -54,20 +54,31 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
54
54
  ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
55
55
  ts.set_index(date.name, inplace=True)
56
56
  ts = ts[ts.index.notna()].sort_index()
57
- if self.offset_size > 0:
58
- ts = ts.iloc[:, :-1].merge(
59
- ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
60
- left_index=True,
61
- right_index=True,
62
- how="left",
63
- )
64
- ts = ts.groupby([c.name for c in data[1:-1]]) if len(data) > 2 else ts
57
+ ts = (
58
+ ts.groupby([c.name for c in data[1:-1]])
59
+ .apply(self._shift)[data[-1].name]
60
+ .to_frame()
61
+ .reset_index()
62
+ .set_index(date.name)
63
+ .groupby([c.name for c in data[1:-1]])
64
+ if len(data) > 2
65
+ else self._shift(ts)
66
+ )
65
67
  ts = self._aggregate(ts)
66
68
  ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
67
69
  ts.index = date.index
68
70
 
69
71
  return ts.iloc[:, -1]
70
72
 
73
+ def _shift(self, ts: pd.DataFrame) -> pd.DataFrame:
74
+ if self.offset_size > 0:
75
+ return ts.iloc[:, :-1].merge(
76
+ ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
77
+ left_index=True,
78
+ right_index=True,
79
+ )
80
+ return ts
81
+
71
82
  @abc.abstractmethod
72
83
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
73
84
  pass
@@ -93,12 +104,35 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
93
104
  )
94
105
 
95
106
  def to_formula(self) -> str:
96
- return f"roll_{self.window_size}{self.window_unit}_{self.aggregation}"
107
+ roll_component = f"roll_{self.window_size}{self.window_unit}"
108
+ if self.offset_size > 0:
109
+ roll_component += f"_offset_{self.offset_size}{self.offset_unit}"
110
+ return f"{roll_component}_{self.aggregation}"
97
111
 
98
112
  @classmethod
99
113
  def from_formula(cls, formula: str) -> Optional["Roll"]:
100
114
  import re
101
115
 
116
+ # Try matching pattern with offset first
117
+ pattern_with_offset = r"^roll_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])_(\w+)$"
118
+ match_with_offset = re.match(pattern_with_offset, formula)
119
+
120
+ if match_with_offset:
121
+ window_size = int(match_with_offset.group(1))
122
+ window_unit = match_with_offset.group(2)
123
+ offset_size = int(match_with_offset.group(3))
124
+ offset_unit = match_with_offset.group(4)
125
+ aggregation = match_with_offset.group(5)
126
+
127
+ return cls(
128
+ window_size=window_size,
129
+ window_unit=window_unit,
130
+ offset_size=offset_size,
131
+ offset_unit=offset_unit,
132
+ aggregation=aggregation,
133
+ )
134
+
135
+ # If no offset pattern found, try basic pattern
102
136
  pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
103
137
  match = re.match(pattern, formula)
104
138
 
@@ -133,12 +167,33 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
133
167
  lag_unit: str = "D"
134
168
 
135
169
  def to_formula(self) -> str:
136
- return f"lag_{self.lag_size}{self.lag_unit}"
170
+ lag_component = f"lag_{self.lag_size}{self.lag_unit}"
171
+ if self.offset_size > 0:
172
+ lag_component += f"_offset_{self.offset_size}{self.offset_unit}"
173
+ return lag_component
137
174
 
138
175
  @classmethod
139
176
  def from_formula(cls, formula: str) -> Optional["Lag"]:
140
177
  import re
141
178
 
179
+ # Try matching pattern with offset first
180
+ pattern_with_offset = r"^lag_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])$"
181
+ match_with_offset = re.match(pattern_with_offset, formula)
182
+
183
+ if match_with_offset:
184
+ lag_size = int(match_with_offset.group(1))
185
+ lag_unit = match_with_offset.group(2)
186
+ offset_size = int(match_with_offset.group(3))
187
+ offset_unit = match_with_offset.group(4)
188
+
189
+ return cls(
190
+ lag_size=lag_size,
191
+ lag_unit=lag_unit,
192
+ offset_size=offset_size,
193
+ offset_unit=offset_unit,
194
+ )
195
+
196
+ # If no offset pattern found, try basic pattern
142
197
  pattern = r"^lag_(\d+)([a-zA-Z])$"
143
198
  match = re.match(pattern, formula)
144
199
 
@@ -152,6 +207,12 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
152
207
 
153
208
  def get_params(self) -> Dict[str, Optional[str]]:
154
209
  res = super().get_params()
210
+ res.update(
211
+ {
212
+ "lag_size": self.lag_size,
213
+ "lag_unit": self.lag_unit,
214
+ }
215
+ )
155
216
  return res
156
217
 
157
218
  def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
@@ -1 +0,0 @@
1
- __version__ = "1.2.56a3818.dev1"