upgini 1.2.56a3675.dev1__tar.gz → 1.2.56a3675.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/PKG-INFO +1 -1
- upgini-1.2.56a3675.dev3/src/upgini/__about__.py +1 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/feature.py +0 -9
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/vector.py +82 -5
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/features_enricher.py +1 -1
- upgini-1.2.56a3675.dev1/src/upgini/__about__.py +0 -1
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/.gitignore +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/LICENSE +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/README.md +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/pyproject.toml +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/ads.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/dataset.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/errors.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/http.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/metadata.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/metrics.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/search_task.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/spinner.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.56a3675.
|
|
3
|
+
Version: 1.2.56a3675.dev3
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.56a3675.dev3"
|
|
@@ -29,15 +29,6 @@ class Column:
|
|
|
29
29
|
self.name = mapping.get(self.name) or self.name
|
|
30
30
|
return self
|
|
31
31
|
|
|
32
|
-
def _unhash(self, feature_name: str) -> str:
|
|
33
|
-
last_component_idx = feature_name.rfind("_")
|
|
34
|
-
if not feature_name.startswith("f_"):
|
|
35
|
-
return feature_name # etalon feature
|
|
36
|
-
elif last_component_idx == 1:
|
|
37
|
-
return feature_name[2:] # fully hashed name, cannot unhash
|
|
38
|
-
else:
|
|
39
|
-
return feature_name[2:last_component_idx]
|
|
40
|
-
|
|
41
32
|
def delete_data(self):
|
|
42
33
|
self.data = None
|
|
43
34
|
|
|
@@ -2,7 +2,11 @@ import abc
|
|
|
2
2
|
from typing import Dict, List, Optional
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
|
-
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from pydantic import field_validator as validator # V2
|
|
8
|
+
except ImportError:
|
|
9
|
+
from pydantic import validator # V1
|
|
6
10
|
|
|
7
11
|
from upgini.autofe.operand import PandasOperand, ParametrizedOperand, VectorizableMixin
|
|
8
12
|
|
|
@@ -29,12 +33,16 @@ class Sum(PandasOperand, VectorizableMixin):
|
|
|
29
33
|
class TimeSeriesBase(PandasOperand, abc.ABC):
|
|
30
34
|
is_vector: bool = True
|
|
31
35
|
date_unit: Optional[str] = None
|
|
36
|
+
offset_size: int = 0
|
|
37
|
+
offset_unit: str = "D"
|
|
32
38
|
|
|
33
39
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
34
40
|
res = super().get_params()
|
|
35
41
|
res.update(
|
|
36
42
|
{
|
|
37
43
|
"date_unit": self.date_unit,
|
|
44
|
+
"offset_size": self.offset_size,
|
|
45
|
+
"offset_unit": self.offset_unit,
|
|
38
46
|
}
|
|
39
47
|
)
|
|
40
48
|
return res
|
|
@@ -46,13 +54,31 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
|
|
|
46
54
|
ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
|
|
47
55
|
ts.set_index(date.name, inplace=True)
|
|
48
56
|
ts = ts[ts.index.notna()].sort_index()
|
|
49
|
-
ts =
|
|
57
|
+
ts = (
|
|
58
|
+
ts.groupby([c.name for c in data[1:-1]])
|
|
59
|
+
.apply(self._shift)[data[-1].name]
|
|
60
|
+
.to_frame()
|
|
61
|
+
.reset_index()
|
|
62
|
+
.set_index(date.name)
|
|
63
|
+
.groupby([c.name for c in data[1:-1]])
|
|
64
|
+
if len(data) > 2
|
|
65
|
+
else self._shift(ts)
|
|
66
|
+
)
|
|
50
67
|
ts = self._aggregate(ts)
|
|
51
68
|
ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
|
|
52
69
|
ts.index = date.index
|
|
53
70
|
|
|
54
71
|
return ts.iloc[:, -1]
|
|
55
72
|
|
|
73
|
+
def _shift(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
74
|
+
if self.offset_size > 0:
|
|
75
|
+
return ts.iloc[:, :-1].merge(
|
|
76
|
+
ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
|
|
77
|
+
left_index=True,
|
|
78
|
+
right_index=True,
|
|
79
|
+
)
|
|
80
|
+
return ts
|
|
81
|
+
|
|
56
82
|
@abc.abstractmethod
|
|
57
83
|
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
58
84
|
pass
|
|
@@ -67,6 +93,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
|
|
|
67
93
|
window_unit: str = "D"
|
|
68
94
|
|
|
69
95
|
@validator("window_unit")
|
|
96
|
+
@classmethod
|
|
70
97
|
def validate_window_unit(cls, v: str) -> str:
|
|
71
98
|
try:
|
|
72
99
|
pd.tseries.frequencies.to_offset(v)
|
|
@@ -77,12 +104,35 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
|
|
|
77
104
|
)
|
|
78
105
|
|
|
79
106
|
def to_formula(self) -> str:
|
|
80
|
-
|
|
107
|
+
roll_component = f"roll_{self.window_size}{self.window_unit}"
|
|
108
|
+
if self.offset_size > 0:
|
|
109
|
+
roll_component += f"_offset_{self.offset_size}{self.offset_unit}"
|
|
110
|
+
return f"{roll_component}_{self.aggregation}"
|
|
81
111
|
|
|
82
112
|
@classmethod
|
|
83
113
|
def from_formula(cls, formula: str) -> Optional["Roll"]:
|
|
84
114
|
import re
|
|
85
115
|
|
|
116
|
+
# Try matching pattern with offset first
|
|
117
|
+
pattern_with_offset = r"^roll_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])_(\w+)$"
|
|
118
|
+
match_with_offset = re.match(pattern_with_offset, formula)
|
|
119
|
+
|
|
120
|
+
if match_with_offset:
|
|
121
|
+
window_size = int(match_with_offset.group(1))
|
|
122
|
+
window_unit = match_with_offset.group(2)
|
|
123
|
+
offset_size = int(match_with_offset.group(3))
|
|
124
|
+
offset_unit = match_with_offset.group(4)
|
|
125
|
+
aggregation = match_with_offset.group(5)
|
|
126
|
+
|
|
127
|
+
return cls(
|
|
128
|
+
window_size=window_size,
|
|
129
|
+
window_unit=window_unit,
|
|
130
|
+
offset_size=offset_size,
|
|
131
|
+
offset_unit=offset_unit,
|
|
132
|
+
aggregation=aggregation,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# If no offset pattern found, try basic pattern
|
|
86
136
|
pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
|
|
87
137
|
match = re.match(pattern, formula)
|
|
88
138
|
|
|
@@ -107,7 +157,7 @@ class Roll(TimeSeriesBase, ParametrizedOperand):
|
|
|
107
157
|
return res
|
|
108
158
|
|
|
109
159
|
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
110
|
-
return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=
|
|
160
|
+
return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=1).agg(
|
|
111
161
|
_roll_aggregations.get(self.aggregation, self.aggregation)
|
|
112
162
|
)
|
|
113
163
|
|
|
@@ -117,12 +167,33 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
|
|
|
117
167
|
lag_unit: str = "D"
|
|
118
168
|
|
|
119
169
|
def to_formula(self) -> str:
|
|
120
|
-
|
|
170
|
+
lag_component = f"lag_{self.lag_size}{self.lag_unit}"
|
|
171
|
+
if self.offset_size > 0:
|
|
172
|
+
lag_component += f"_offset_{self.offset_size}{self.offset_unit}"
|
|
173
|
+
return lag_component
|
|
121
174
|
|
|
122
175
|
@classmethod
|
|
123
176
|
def from_formula(cls, formula: str) -> Optional["Lag"]:
|
|
124
177
|
import re
|
|
125
178
|
|
|
179
|
+
# Try matching pattern with offset first
|
|
180
|
+
pattern_with_offset = r"^lag_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])$"
|
|
181
|
+
match_with_offset = re.match(pattern_with_offset, formula)
|
|
182
|
+
|
|
183
|
+
if match_with_offset:
|
|
184
|
+
lag_size = int(match_with_offset.group(1))
|
|
185
|
+
lag_unit = match_with_offset.group(2)
|
|
186
|
+
offset_size = int(match_with_offset.group(3))
|
|
187
|
+
offset_unit = match_with_offset.group(4)
|
|
188
|
+
|
|
189
|
+
return cls(
|
|
190
|
+
lag_size=lag_size,
|
|
191
|
+
lag_unit=lag_unit,
|
|
192
|
+
offset_size=offset_size,
|
|
193
|
+
offset_unit=offset_unit,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# If no offset pattern found, try basic pattern
|
|
126
197
|
pattern = r"^lag_(\d+)([a-zA-Z])$"
|
|
127
198
|
match = re.match(pattern, formula)
|
|
128
199
|
|
|
@@ -136,6 +207,12 @@ class Lag(TimeSeriesBase, ParametrizedOperand):
|
|
|
136
207
|
|
|
137
208
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
138
209
|
res = super().get_params()
|
|
210
|
+
res.update(
|
|
211
|
+
{
|
|
212
|
+
"lag_size": self.lag_size,
|
|
213
|
+
"lag_unit": self.lag_unit,
|
|
214
|
+
}
|
|
215
|
+
)
|
|
139
216
|
return res
|
|
140
217
|
|
|
141
218
|
def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -3733,7 +3733,7 @@ if response.status_code == 200:
|
|
|
3733
3733
|
features_info_without_links = []
|
|
3734
3734
|
internal_features_info = []
|
|
3735
3735
|
|
|
3736
|
-
original_shaps = {fm.name: fm.shap_value for fm in features_meta}
|
|
3736
|
+
original_shaps = {original_names_dict.get(fm.name, fm.name): fm.shap_value for fm in features_meta}
|
|
3737
3737
|
|
|
3738
3738
|
if updated_shaps is not None:
|
|
3739
3739
|
for fm in features_meta:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.56a3675.dev1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/ads_management/ads_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/normalizer/normalize_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/exceptions.py
RENAMED
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/sampler/random_under_sampler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.56a3675.dev1 → upgini-1.2.56a3675.dev3}/src/upgini/utils/fallback_progress_bar.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|