upgini 1.2.71a3810.dev3__tar.gz → 1.2.71a3810.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/PKG-INFO +1 -1
- upgini-1.2.71a3810.dev4/src/upgini/__about__.py +1 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/date.py +20 -4
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/unary.py +2 -2
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/vector.py +9 -0
- upgini-1.2.71a3810.dev3/src/upgini/__about__.py +0 -1
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/.gitignore +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/LICENSE +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/README.md +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/pyproject.toml +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/ads.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/dataset.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/errors.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/features_enricher.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/http.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/metadata.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/metrics.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/search_task.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/spinner.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.71a3810.
|
|
3
|
+
Version: 1.2.71a3810.dev4
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.71a3810.dev4"
|
|
@@ -187,16 +187,21 @@ class DateListDiff(PandasOperator, DateDiffMixin, ParametrizedOperator):
|
|
|
187
187
|
class DateListDiffBounded(DateListDiff, ParametrizedOperator):
|
|
188
188
|
lower_bound: Optional[int] = None
|
|
189
189
|
upper_bound: Optional[int] = None
|
|
190
|
+
normalize: Optional[bool] = None
|
|
190
191
|
|
|
191
192
|
def to_formula(self) -> str:
|
|
192
193
|
lower_bound = "minusinf" if self.lower_bound is None else self.lower_bound
|
|
193
194
|
upper_bound = "plusinf" if self.upper_bound is None else self.upper_bound
|
|
194
|
-
|
|
195
|
+
norm = "_norm" if self.normalize else ""
|
|
196
|
+
return f"date_diff_{self.diff_unit}_{lower_bound}_{upper_bound}_{self.aggregation}{norm}"
|
|
195
197
|
|
|
196
198
|
@classmethod
|
|
197
199
|
def from_formula(cls, formula: str) -> Optional["DateListDiffBounded"]:
|
|
198
200
|
import re
|
|
199
201
|
|
|
202
|
+
normalize = formula.endswith("_norm")
|
|
203
|
+
formula = formula.replace("_norm", "")
|
|
204
|
+
|
|
200
205
|
pattern = r"^date_diff_([^_]+)_((minusinf|\d+))_((plusinf|\d+))_(\w+)$"
|
|
201
206
|
match = re.match(pattern, formula)
|
|
202
207
|
|
|
@@ -207,8 +212,13 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
|
|
|
207
212
|
lower_bound = None if match.group(2) == "minusinf" else int(match.group(2))
|
|
208
213
|
upper_bound = None if match.group(4) == "plusinf" else int(match.group(4))
|
|
209
214
|
aggregation = match.group(6)
|
|
210
|
-
|
|
211
|
-
|
|
215
|
+
return cls(
|
|
216
|
+
diff_unit=diff_unit,
|
|
217
|
+
lower_bound=lower_bound,
|
|
218
|
+
upper_bound=upper_bound,
|
|
219
|
+
aggregation=aggregation,
|
|
220
|
+
normalize=normalize,
|
|
221
|
+
)
|
|
212
222
|
|
|
213
223
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
214
224
|
res = super().get_params()
|
|
@@ -216,14 +226,20 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
|
|
|
216
226
|
res["lower_bound"] = str(self.lower_bound)
|
|
217
227
|
if self.upper_bound is not None:
|
|
218
228
|
res["upper_bound"] = str(self.upper_bound)
|
|
229
|
+
if self.normalize is not None:
|
|
230
|
+
res["normalize"] = str(self.normalize)
|
|
219
231
|
return res
|
|
220
232
|
|
|
221
233
|
def _agg(self, x):
|
|
234
|
+
orig_len = len(x)
|
|
222
235
|
x = x[
|
|
223
236
|
(x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
|
|
224
237
|
& (x < (self.upper_bound if self.upper_bound is not None else np.inf))
|
|
225
238
|
]
|
|
226
|
-
|
|
239
|
+
agg_res = super()._agg(x)
|
|
240
|
+
if self.normalize and orig_len > 0:
|
|
241
|
+
return agg_res / orig_len
|
|
242
|
+
return agg_res
|
|
227
243
|
|
|
228
244
|
|
|
229
245
|
class DatePercentileBase(PandasOperator, abc.ABC):
|
|
@@ -160,12 +160,12 @@ class Embeddings(PandasOperator):
|
|
|
160
160
|
class Bin(PandasOperator):
|
|
161
161
|
name: str = "bin"
|
|
162
162
|
is_unary: bool = True
|
|
163
|
-
output_type: Optional[str] = "
|
|
163
|
+
output_type: Optional[str] = "category"
|
|
164
164
|
bin_bounds: List[int] = []
|
|
165
165
|
is_categorical: bool = True
|
|
166
166
|
|
|
167
167
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
168
|
-
return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype(
|
|
168
|
+
return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype("category")
|
|
169
169
|
|
|
170
170
|
def _bin(self, f, bounds):
|
|
171
171
|
if f is None or np.isnan(f):
|
|
@@ -22,3 +22,12 @@ class Sum(PandasOperator, VectorizableMixin):
|
|
|
22
22
|
|
|
23
23
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
24
24
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Vectorize(PandasOperator, VectorizableMixin):
|
|
28
|
+
name: str = "vectorize"
|
|
29
|
+
is_vector: bool = True
|
|
30
|
+
group_index: int = 0
|
|
31
|
+
|
|
32
|
+
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
33
|
+
return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.71a3810.dev3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/ads_management/ads_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/autofe/timeseries/volatility.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/normalizer/normalize_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/exceptions.py
RENAMED
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/sampler/random_under_sampler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.71a3810.dev3 → upgini-1.2.71a3810.dev4}/src/upgini/utils/fallback_progress_bar.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|